WIP (clang is crashing)

2025-07-06 20:09:58 +00:00 · 2025-03-11 13:33:06 -04:00 · 2025-03-11 13:33:06 -04:00 · 7745f208bc
commit 7745f208bc
parent 1dc9f22d5b
46 changed files with 1760 additions and 704 deletions
--- a/.codespellrc
+++ b/.codespellrc
@ -1,3 +1,3 @@
 [codespell]
-ignore-words-list = blong, afterall, assistent, crasher, requestor
+ignore-words-list = blong, afterall, assistent, crasher, requestor, nam
 skip = ./.git,./gpt4all-chat/translations,*.pdf,*.svg,*.lock
--- a/.gitmodules
+++ b/.gitmodules
@ -26,3 +26,9 @@
 [submodule "gpt4all-backend/deps/qcoro"]
 	path = deps/qcoro
 	url = https://github.com/nomic-ai/qcoro.git
 [submodule "gpt4all-backend/deps/date"]
 	path = gpt4all-backend/deps/date
 	url = https://github.com/HowardHinnant/date.git
 [submodule "gpt4all-chat/deps/generator"]
 	path = gpt4all-chat/deps/generator
 	url = https://github.com/TartanLlama/generator.git
--- a/docs/gpt4all_desktop/settings.md
+++ b/docs/gpt4all_desktop/settings.md
@ -49,7 +49,6 @@ You can **clone** an existing model, which allows you to save a configuration of
    |----------------------------|------------------------------------------|-----------|
    | **Context Length**         | Maximum length of input sequence in tokens        | 2048      |
    | **Max Length**             | Maximum length of response in tokens     | 4096      |
    | **Prompt Batch Size**      | Token batch size for parallel processing | 128      |
    | **Temperature**            | Lower temperature gives more likely generations | 0.7       |
    | **Top P**                  | Prevents choosing highly unlikely tokens  | 0.4       |
    | **Top K**                  | Size of selection pool for tokens         | 40        |
--- a/gpt4all-backend/deps/CMakeLists.txt
+++ b/gpt4all-backend/deps/CMakeLists.txt
@ -12,3 +12,5 @@ FetchContent_Declare(
    URL_HASH "SHA256=7da75f171837577a52bbf217e17f8ea576c7c246e4594d617bfde7fafd408be5"
 )
 FetchContent_MakeAvailable(boost)
 add_subdirectory(date)
--- a/gpt4all-backend/deps/date
+++ b/gpt4all-backend/deps/date
@ -0,0 +1 @@
 Subproject commit 5bdb7e6f31fac909c090a46dbd9fea27b6e609a4
--- a/gpt4all-backend/include/gpt4all-backend/formatters.h
+++ b/gpt4all-backend/include/gpt4all-backend/formatters.h
@ -26,7 +26,8 @@
        }                                                                        \
    }
-MAKE_FORMATTER(QUtf8StringView, value                    );
+MAKE_FORMATTER(QLatin1StringView, value                    );
-MAKE_FORMATTER(QStringView,     value.toUtf8()           );
+MAKE_FORMATTER(QString,           value.toUtf8()           );
-MAKE_FORMATTER(QString,         value.toUtf8()           );
+MAKE_FORMATTER(QStringView,       value.toUtf8()           );
-MAKE_FORMATTER(QVariant,        value.toString().toUtf8());
+MAKE_FORMATTER(QUtf8StringView,   value                    );
 MAKE_FORMATTER(QVariant,          value.toString().toUtf8());
--- a/gpt4all-backend/include/gpt4all-backend/ollama-client.h
+++ b/gpt4all-backend/include/gpt4all-backend/ollama-client.h
@ -11,7 +11,6 @@
 #include <QString>
 #include <QUrl>
 #include <cassert>
 #include <expected>
 #include <utility>
 #include <variant>
@ -26,26 +25,21 @@ namespace gpt4all::backend {
 struct ResponseError {
 public:
    struct BadStatus { int code; };
 private:
    using ErrorCode = std::variant<
        QNetworkReply::NetworkError,
        boost::system::error_code,
        BadStatus
    >;
 public:
    ErrorCode error;
    QString   errorString;
    ResponseError(const QRestReply *reply);
    ResponseError(const boost::system::system_error &e);
-    ResponseError(const boost::system::system_error &e)
+    const ErrorCode &error      () { return m_error;       }
-        : error(e.code())
+    const QString   &errorString() { return m_errorString; }
-        , errorString(QString::fromUtf8(e.what()))
+
-    {
+private:
-        assert(e.code());
+    ErrorCode m_error;
-    }
+    QString   m_errorString;
 };
 template <typename T>
--- a/gpt4all-backend/src/CMakeLists.txt
+++ b/gpt4all-backend/src/CMakeLists.txt
@ -21,6 +21,7 @@ target_link_libraries(${TARGET} PUBLIC
 )
 target_link_libraries(${TARGET} PRIVATE
    QCoro6::Network
    date::date
    fmt::fmt
 )
--- a/gpt4all-backend/src/ollama-client.cpp
+++ b/gpt4all-backend/src/ollama-client.cpp
@ -13,7 +13,6 @@
 #include <QVariant>
 #include <QtAssert>
 #include <coroutine>
 #include <expected>
 #include <memory>
@ -28,13 +27,20 @@ namespace gpt4all::backend {
 ResponseError::ResponseError(const QRestReply *reply)
 {
    if (reply->hasError()) {
-        error = reply->networkReply()->error();
+        m_error = reply->networkReply()->error();
    } else if (!reply->isHttpStatusSuccess()) {
-        error = BadStatus(reply->httpStatus());
+        m_error = BadStatus(reply->httpStatus());
    } else
        Q_UNREACHABLE();
-    errorString = restErrorString(*reply);
+    m_errorString = restErrorString(*reply);
 }
 ResponseError::ResponseError(const boost::system::system_error &e)
    : m_error(e.code())
    , m_errorString(QString::fromUtf8(e.what()))
 {
    Q_ASSERT(e.code());
 }
 QNetworkRequest OllamaClient::makeRequest(const QString &path) const
--- a/gpt4all-backend/src/ollama-types.cpp
+++ b/gpt4all-backend/src/ollama-types.cpp
@ -4,6 +4,7 @@
 #include <fmt/chrono.h> // IWYU pragma: keep
 #include <fmt/format.h>
 #include <date/date.h>
 #include <sstream>
 #include <string>
@ -40,7 +41,7 @@ Time tag_invoke(const json::value_to_tag<Time> &, const json::value &value)
    Time time;
    std::istringstream iss(json::string_view(value.as_string()));
-    iss >> std::chrono::parse("%FT%T%Ez", time);
+    iss >> date::parse("%FT%T%Ez", time);
    if (!iss && !iss.eof())
        throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
    return time;
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@ -227,9 +227,6 @@ if (APPLE)
 endif()
 qt_add_executable(chat
    src/llmodel/provider.cpp      src/llmodel/provider.h
    src/llmodel/openai.cpp        src/llmodel/openai.h
    src/main.cpp
    src/chat.cpp                  src/chat.h
    src/chatlistmodel.cpp         src/chatlistmodel.h
    src/chatllm.cpp               src/chatllm.h
@ -241,14 +238,22 @@ qt_add_executable(chat
    src/embllm.cpp                src/embllm.h
    src/jinja_helpers.cpp         src/jinja_helpers.h
    src/jinja_replacements.cpp    src/jinja_replacements.h
    src/json-helpers.cpp          src/json-helpers.h
    src/llm.cpp                   src/llm.h
    src/llmodel_chat.h
    src/llmodel_ollama.cpp        src/llmodel_ollama.h
    src/llmodel_openai.cpp        src/llmodel_openai.h
    src/llmodel_provider.cpp      src/llmodel_provider.h
    src/localdocs.cpp             src/localdocs.h
    src/localdocsmodel.cpp        src/localdocsmodel.h
    src/logger.cpp                src/logger.h
    src/main.cpp
    src/modellist.cpp             src/modellist.h
    src/mysettings.cpp            src/mysettings.h
    src/network.cpp               src/network.h
    src/server.cpp                src/server.h
    src/store_base.cpp            src/store_base.h
    src/store_provider.cpp        src/store_provider.h
    src/tool.cpp                  src/tool.h
    src/toolcallparser.cpp        src/toolcallparser.h
    src/toolmodel.cpp             src/toolmodel.h
@ -448,8 +453,9 @@ target_compile_definitions(chat PRIVATE QT_NO_SIGNALS_SLOTS_KEYWORDS)
 target_include_directories(chat PRIVATE deps/usearch/include
                                        deps/usearch/fp16/include)
-target_link_libraries(chat
+target_link_libraries(chat PRIVATE
-    PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg)
+    Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg
 )
 if (GPT4ALL_USING_QTPDF)
    target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF)
    target_link_libraries(chat PRIVATE Qt6::Pdf)
@ -458,6 +464,7 @@ else()
    target_link_libraries(chat PRIVATE pdfium)
 endif()
 target_link_libraries(chat PRIVATE
    Boost::describe Boost::json Boost::system
    QCoro6::Core QCoro6::Network
    QXlsx
    SingleApplication
@ -466,6 +473,7 @@ target_link_libraries(chat PRIVATE
    gpt4all-backend
    llmodel
    nlohmann_json::nlohmann_json
    tl::generator
 )
 target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/minja/include)
--- a/gpt4all-chat/deps/CMakeLists.txt
+++ b/gpt4all-chat/deps/CMakeLists.txt
@ -14,6 +14,10 @@ add_subdirectory(QXlsx/QXlsx)
 add_subdirectory(json)  # required by minja
 # TartanLlama
 set(FUNCTION_REF_ENABLE_TESTS OFF)
 add_subdirectory(generator)
 if (NOT GPT4ALL_USING_QTPDF)
    # If we do not use QtPDF, we need to get PDFium.
    set(GPT4ALL_PDFIUM_TAG "chromium/6996")
--- a/gpt4all-chat/deps/generator
+++ b/gpt4all-chat/deps/generator
@ -0,0 +1 @@
 Subproject commit 2a912502de4f97dcba4f95c958ee0ddf7bc22cf5
--- a/gpt4all-chat/qml/ApplicationSettings.qml
+++ b/gpt4all-chat/qml/ApplicationSettings.qml
@ -454,38 +454,6 @@ MySettingsTab {
            }
        }
        MySettingsLabel {
            id: nThreadsLabel
            text: qsTr("CPU Threads")
            helpText: qsTr("The number of CPU threads used for inference and embedding.")
            Layout.row: 11
            Layout.column: 0
        }
        MyTextField {
            text: MySettings.threadCount
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
            Layout.alignment: Qt.AlignRight
            Layout.row: 11
            Layout.column: 2
            Layout.minimumWidth: 200
            Layout.maximumWidth: 200
            validator: IntValidator {
                bottom: 1
            }
            onEditingFinished: {
                var val = parseInt(text)
                if (!isNaN(val)) {
                    MySettings.threadCount = val
                    focus = false
                } else {
                    text = MySettings.threadCount
                }
            }
            Accessible.role: Accessible.EditableText
            Accessible.name: nThreadsLabel.text
            Accessible.description: ToolTip.text
        }
        MySettingsLabel {
            id: trayLabel
            text: qsTr("Enable System Tray")
--- a/gpt4all-chat/qml/ModelSettings.qml
+++ b/gpt4all-chat/qml/ModelSettings.qml
@ -791,53 +791,6 @@ MySettingsTab {
                Accessible.description: ToolTip.text
            }
            MySettingsLabel {
                id: batchSizeLabel
                visible: !root.currentModelInfo.isOnline
                text: qsTr("Prompt Batch Size")
                helpText: qsTr("The batch size used for prompt processing.")
                Layout.row: 1
                Layout.column: 0
                Layout.maximumWidth: 300 * theme.fontScale
            }
            MyTextField {
                id: batchSizeField
                visible: !root.currentModelInfo.isOnline
                text: root.currentModelInfo.promptBatchSize
                color: theme.textColor
                font.pixelSize: theme.fontSizeLarge
                ToolTip.text: qsTr("Amount of prompt tokens to process at once.\nNOTE: Higher values can speed up reading prompts but will use more RAM.")
                ToolTip.visible: hovered
                Layout.row: 1
                Layout.column: 1
                validator: IntValidator {
                    bottom: 1
                }
                Connections {
                    target: MySettings
                    function onPromptBatchSizeChanged() {
                        batchSizeField.text = root.currentModelInfo.promptBatchSize;
                    }
                }
                Connections {
                    target: root
                    function onCurrentModelInfoChanged() {
                        batchSizeField.text = root.currentModelInfo.promptBatchSize;
                    }
                }
                onEditingFinished: {
                    var val = parseInt(text)
                    if (!isNaN(val)) {
                        MySettings.setModelPromptBatchSize(root.currentModelInfo, val)
                        focus = false
                    } else {
                        text = root.currentModelInfo.promptBatchSize
                    }
                }
                Accessible.role: Accessible.EditableText
                Accessible.name: batchSizeLabel.text
                Accessible.description: ToolTip.text
            }
            MySettingsLabel {
                id: repeatPenaltyLabel
                visible: !root.currentModelInfo.isOnline
--- a/gpt4all-chat/src/chatlistmodel.cpp
+++ b/gpt4all-chat/src/chatlistmodel.cpp
@ -21,7 +21,7 @@
 static constexpr quint32 CHAT_FORMAT_MAGIC   = 0xF5D553CC;
-static constexpr qint32  CHAT_FORMAT_VERSION = 12;
+static constexpr qint32  CHAT_FORMAT_VERSION = 13;
 class MyChatListModel: public ChatListModel { };
 Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance)
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@ -3,8 +3,9 @@
 #include "chat.h"
 #include "chatmodel.h"
 #include "jinja_helpers.h"
-#include "llmodel/chat.h"
+#include "llmodel_chat.h"
-#include "llmodel/openai.h"
+#include "llmodel_description.h"
 #include "llmodel_provider.h"
 #include "localdocs.h"
 #include "mysettings.h"
 #include "network.h"
@ -12,6 +13,8 @@
 #include "toolcallparser.h"
 #include "toolmodel.h"
 #include <QCoro/QCoroAsyncGenerator>
 #include <QCoro/QCoroTask>
 #include <fmt/format.h>
 #include <gpt4all-backend/generation-params.h>
 #include <minja/minja.hpp>
@ -118,10 +121,15 @@ public:
    virtual bool getStopGenerating () const                                                                           = 0;
 };
 struct PromptModelWithToolsResult {
    ChatResponseMetadata metadata;
    QStringList          toolCallBuffers;
    bool                 shouldExecuteToolCall;
 };
 static auto promptModelWithTools(
-    ChatLLModel *model, BaseResponseHandler &respHandler, const backend::GenerationParams &params,
+    ChatLLMInstance *model, BaseResponseHandler &respHandler, const GenerationParams &params, const QByteArray &prompt,
-    const QByteArray &prompt, const QStringList &toolNames
+    const QStringList &toolNames
-) -> std::pair<QStringList, bool>
+) -> QCoro::Task<PromptModelWithToolsResult>
 {
    ToolCallParser toolCallParser(toolNames);
    auto handleResponse = [&toolCallParser, &respHandler](std::string_view piece) -> bool {
@ -159,30 +167,31 @@ static auto promptModelWithTools(
        return !shouldExecuteToolCall && !respHandler.getStopGenerating();
    };
-    model->prompt(std::string_view(prompt), promptCallback, handleResponse, params);
+    ChatResponseMetadata metadata;
    auto stream = model->generate(QString::fromUtf8(prompt), params, metadata);
    QCORO_FOREACH(auto &piece, stream) {
        if (!handleResponse(std::string_view(piece.toUtf8())))
            break;
    }
    const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
        && toolCallParser.startTag() != ToolCallConstants::ThinkStartTag;
-    return { toolCallParser.buffers(), shouldExecuteToolCall };
+    co_return { metadata, toolCallParser.buffers(), shouldExecuteToolCall };
 }
 class LLModelStore {
 public:
    static LLModelStore *globalInstance();
-    LLModelInfo acquireModel(); // will block until llmodel is ready
+    auto acquireModel() -> std::unique_ptr<ChatLLMInstance>; // will block until llmodel is ready
-    void releaseModel(LLModelInfo &&info); // must be called when you are done
+    void releaseModel(std::unique_ptr<ChatLLMInstance> &&info); // must be called when you are done
    void destroy();
 private:
-    LLModelStore()
+    LLModelStore() { m_availableModel.emplace(); /* seed with empty model */ }
-    {
+    ~LLModelStore() = default;
-        // seed with empty model
+    std::optional<std::unique_ptr<ChatLLMInstance>> m_availableModel;
        m_availableModel = LLModelInfo();
    }
    ~LLModelStore() {}
    std::optional<LLModelInfo> m_availableModel;
    QMutex m_mutex;
    QWaitCondition m_condition;
    friend class MyLLModelStore;
@ -195,7 +204,7 @@ LLModelStore *LLModelStore::globalInstance()
    return storeInstance();
 }
-LLModelInfo LLModelStore::acquireModel()
+auto LLModelStore::acquireModel() -> std::unique_ptr<ChatLLMInstance>
 {
    QMutexLocker locker(&m_mutex);
    while (!m_availableModel)
@ -205,7 +214,7 @@ LLModelInfo LLModelStore::acquireModel()
    return first;
 }
-void LLModelStore::releaseModel(LLModelInfo &&info)
+void LLModelStore::releaseModel(std::unique_ptr<ChatLLMInstance> &&info)
 {
    QMutexLocker locker(&m_mutex);
    Q_ASSERT(!m_availableModel);
@ -219,11 +228,6 @@ void LLModelStore::destroy()
    m_availableModel.reset();
 }
 void LLModelInfo::resetModel(ChatLLM *cllm, ChatLLModel *model) {
    this->model.reset(model);
    emit cllm->loadedModelInfoChanged();
 }
 ChatLLM::ChatLLM(Chat *parent, bool isServer)
    : QObject{nullptr}
    , m_chat(parent)
@ -264,9 +268,8 @@ void ChatLLM::destroy()
    // The only time we should have a model loaded here is on shutdown
    // as we explicitly unload the model in all other circumstances
-    if (isModelLoaded()) {
+    if (isModelLoaded())
-        m_llModelInfo.resetModel(this);
+        m_llmInstance.reset();
    }
 }
 void ChatLLM::destroyStore()
@ -288,7 +291,7 @@ bool ChatLLM::loadDefaultModel()
        emit modelLoadingError(u"Could not find any model to load"_s);
        return false;
    }
-    return loadModel(defaultModel);
+    return QCoro::waitFor(loadModel(defaultModel));
 }
 void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
@ -305,24 +308,21 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
        return;
    }
    QString filePath = modelInfo.dirpath + modelInfo.filename();
    QFileInfo fileInfo(filePath);
    acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
-        qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
+        qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llmInstance.get();
 #endif
    // The store gave us no already loaded model, the wrong type of model, then give it back to the
    // store and fail
-    if (!m_llModelInfo.model || m_llModelInfo.fileInfo != fileInfo || !m_shouldBeLoaded) {
+    if (!m_llmInstance || *m_llmInstance->description() != *modelInfo.modelDesc() || !m_shouldBeLoaded) {
-        LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
+        LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
        emit trySwitchContextOfLoadedModelCompleted(0);
        return;
    }
 #if defined(DEBUG_MODEL_LOADING)
-    qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model.get();
+    qDebug() << "store had our model" << m_llmThread.objectName() << m_llmInstance.model.get();
 #endif
    emit trySwitchContextOfLoadedModelCompleted(2);
@ -330,233 +330,119 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
    emit trySwitchContextOfLoadedModelCompleted(0);
 }
-bool ChatLLM::loadModel(const ModelInfo &modelInfo)
+// TODO: always call with a resource guard held since this didn't previously use coroutines
 auto ChatLLM::loadModel(const ModelInfo &modelInfo) -> QCoro::Task<bool>
 {
-    // This is a complicated method because N different possible threads are interested in the outcome
+    // TODO: get the description from somewhere
-    // of this method. Why? Because we have a main/gui thread trying to monitor the state of N different
+    bool alreadyAcquired = isModelLoaded();
-    // possible chat threads all vying for a single resource - the currently loaded model - as the user
+    if (alreadyAcquired && *modelInfo.modelDesc() == *m_modelInfo.modelDesc()) {
    // switches back and forth between chats. It is important for our main/gui thread to never block
    // but simultaneously always have up2date information with regards to which chat has the model loaded
    // and what the type and name of that model is. I've tried to comment extensively in this method
    // to provide an overview of what we're doing here.
    if (isModelLoaded() && this->modelInfo() == modelInfo) {
        // already acquired -> keep it
-        return true; // already loaded
+        if (modelInfo != m_modelInfo) {
            // switch to different clone of same model
            Q_ASSERT(modelInfo.isClone() || m_modelInfo.isClone());
            m_modelInfo = modelInfo;
        }
        co_return true;
    }
    // reset status
    emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
    emit modelLoadingError("");
    QString filePath = modelInfo.dirpath + modelInfo.filename();
    QFileInfo fileInfo(filePath);
    // We have a live model, but it isn't the one we want
    bool alreadyAcquired = isModelLoaded();
    if (alreadyAcquired) {
-#if defined(DEBUG_MODEL_LOADING)
+        // we own a different model -> destroy it and load the requested one
-        qDebug() << "already acquired model deleted" << m_llmThread.objectName() << m_llModelInfo.model.get();
+        m_llmInstance.reset();
-#endif
+    } else if (!m_isServer) { // (the server loads models lazily rather than eagerly)
-        m_llModelInfo.resetModel(this);
+        // wait for the model to become available
-    } else if (!m_isServer) {
+        acquireModel(); // (blocks)
-        // This is a blocking call that tries to retrieve the model we need from the model store.
+
-        // If it succeeds, then we just have to restore state. If the store has never had a model
+        // check if request was canceled while we were waiting
        // returned to it, then the modelInfo.model pointer should be null which will happen on startup
        acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
        qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
        // At this point it is possible that while we were blocked waiting to acquire the model from the
        // store, that our state was changed to not be loaded. If this is the case, release the model
        // back into the store and quit loading
        if (!m_shouldBeLoaded) {
-#if defined(DEBUG_MODEL_LOADING)
+            LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
            qDebug() << "no longer need model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
            emit modelLoadingPercentageChanged(0.0f);
-            return false;
+            co_return false;
        }
-        // Check if the store just gave us exactly the model we were looking for
+        // if it was the requested model, we are done
-        if (m_llModelInfo.model && m_llModelInfo.fileInfo == fileInfo) {
+        if (m_llmInstance && *m_llmInstance->description() == *modelInfo.modelDesc()) {
 #if defined(DEBUG_MODEL_LOADING)
            qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
            emit modelLoadingPercentageChanged(1.0f);
            setModelInfo(modelInfo);
            Q_ASSERT(!m_modelInfo.filename().isEmpty());
            if (m_modelInfo.filename().isEmpty())
                emit modelLoadingError(u"Modelinfo is left null for %1"_s.arg(modelInfo.filename()));
-            return true;
+            co_return true;
        } else {
            // Release the memory since we have to switch to a different model.
 #if defined(DEBUG_MODEL_LOADING)
            qDebug() << "deleting model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
            m_llModelInfo.resetModel(this);
        }
        // we own a different model -> destroy it and load the requested one
        m_llmInstance.reset();
    }
-    // Guarantee we've released the previous models memory
+    QVariantMap modelLoadProps;
-    Q_ASSERT(!m_llModelInfo.model);
+    if (!co_await loadNewModel(modelInfo, modelLoadProps))
        co_return false; // m_shouldBeLoaded became false
-    // Store the file info in the modelInfo in case we have an error loading
+    emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
-    m_llModelInfo.fileInfo = fileInfo;
+    emit loadedModelInfoChanged();
-    if (fileInfo.exists()) {
+    modelLoadProps.insert("model", modelInfo.filename());
-        QVariantMap modelLoadProps;
+    Network::globalInstance()->trackChatEvent("model_load", modelLoadProps);
        if (modelInfo.isOnline) {
            QString apiKey;
            QString requestUrl;
            QString modelName;
            {
                QFile file(filePath);
                bool success = file.open(QIODeviceBase::ReadOnly);
                (void)success;
                Q_ASSERT(success);
                QJsonDocument doc = QJsonDocument::fromJson(file.readAll());
                QJsonObject obj = doc.object();
                apiKey = obj["apiKey"].toString();
                modelName = obj["modelName"].toString();
                if (modelInfo.isCompatibleApi) {
                    QString baseUrl(obj["baseUrl"].toString());
                    QUrl apiUrl(QUrl::fromUserInput(baseUrl));
                    if (!Network::isHttpUrlValid(apiUrl)) {
                        return false;
                    }
                    QString currentPath(apiUrl.path());
                    QString suffixPath("%1/chat/completions");
                    apiUrl.setPath(suffixPath.arg(currentPath));
                    requestUrl = apiUrl.toString();
                } else {
                    requestUrl = modelInfo.url();
                }
            }
            m_llModelType = LLModelTypeV1::API;
            ChatAPI *model = new ChatAPI();
            model->setModelName(modelName);
            model->setRequestURL(requestUrl);
            model->setAPIKey(apiKey);
            m_llModelInfo.resetModel(this, model);
        } else if (!loadNewModel(modelInfo, modelLoadProps)) {
            return false; // m_shouldBeLoaded became false
        }
 #if defined(DEBUG_MODEL_LOADING)
        qDebug() << "new model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
 #if defined(DEBUG)
        qDebug() << "modelLoadedChanged" << m_llmThread.objectName();
        fflush(stdout);
 #endif
        emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
        emit loadedModelInfoChanged();
-        modelLoadProps.insert("model", modelInfo.filename());
+    if (m_llmInstance)
        Network::globalInstance()->trackChatEvent("model_load", modelLoadProps);
    } else {
        if (!m_isServer)
            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo)); // release back into the store
        resetModel();
        emit modelLoadingError(u"Could not find file for model %1"_s.arg(modelInfo.filename()));
    }
    if (m_llModelInfo.model)
        setModelInfo(modelInfo);
-    return bool(m_llModelInfo.model);
+    co_return bool(m_llmInstance);
 }
 /* Returns false if the model should no longer be loaded (!m_shouldBeLoaded).
 * Otherwise returns true, even on error. */
-bool ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps)
+auto ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) -> QCoro::Task<bool>
 {
    auto *mysettings = MySettings::globalInstance();
    QElapsedTimer modelLoadTimer;
    modelLoadTimer.start();
-    int n_ctx = MySettings::globalInstance()->modelContextLength(modelInfo);
+    // TODO: pass these as generation params
-    int ngl = MySettings::globalInstance()->modelGpuLayers(modelInfo);
+    int n_ctx = mysettings->modelContextLength(modelInfo);
    int ngl   = mysettings->modelGpuLayers    (modelInfo);
-    std::string backend = "auto";
+    m_llmInstance = modelInfo.modelDesc()->newInstance(&m_nam);
    QString filePath = modelInfo.dirpath + modelInfo.filename();
-    auto construct = [this, &filePath, &modelInfo, &modelLoadProps, n_ctx]() {
+    // TODO: progress callback
-        QString constructError;
+#if 0
-        m_llModelInfo.resetModel(this);
+    m_llmInstance->setProgressCallback([this](float progress) -> bool {
-        auto *model = LLModel::Implementation::construct(filePath.toStdString(), "", n_ctx);
+        progress = std::max(progress, std::numeric_limits<float>::min()); // keep progress above zero
-        m_llModelInfo.resetModel(this, model);
+        emit modelLoadingPercentageChanged(progress);
-
+        return m_shouldBeLoaded;
-        if (!m_llModelInfo.model) {
+    });
-            if (!m_isServer)
+#endif
-                LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
+    co_await m_llmInstance->preload();
            resetModel();
            emit modelLoadingError(u"Error loading %1: %2"_s.arg(modelInfo.filename(), constructError));
            return false;
        }
        m_llModelInfo.model->setProgressCallback([this](float progress) -> bool {
            progress = std::max(progress, std::numeric_limits<float>::min()); // keep progress above zero
            emit modelLoadingPercentageChanged(progress);
            return m_shouldBeLoaded;
        });
        return true;
    };
    if (!construct())
        return true;
    if (m_llModelInfo.model->isModelBlacklisted(filePath.toStdString())) {
        static QSet<QString> warned;
        auto fname = modelInfo.filename();
        if (!warned.contains(fname)) {
            emit modelLoadingWarning(
                u"%1 is known to be broken. Please get a replacement via the download dialog."_s.arg(fname)
            );
            warned.insert(fname); // don't warn again until restart
        }
    }
    bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);
    if (!m_shouldBeLoaded) {
-        m_llModelInfo.resetModel(this);
+        m_llmInstance.reset();
        if (!m_isServer)
-            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
+            LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
        resetModel();
        emit modelLoadingPercentageChanged(0.0f);
-        return false;
+        co_return false;
    }
    bool success = true; // TODO: check for failure
    if (!success) {
-        m_llModelInfo.resetModel(this);
+        m_llmInstance.reset();
        if (!m_isServer)
-            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
+            LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
        resetModel();
        emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
        modelLoadProps.insert("error", "loadmodel_failed");
-        return true;
+        co_return true;
    }
    switch (m_llModelInfo.model->implementation().modelType()[0]) {
    case 'L': m_llModelType = LLModelTypeV1::LLAMA; break;
    default:
        {
            m_llModelInfo.resetModel(this);
            if (!m_isServer)
                LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
            resetModel();
            emit modelLoadingError(u"Could not determine model type for %1"_s.arg(modelInfo.filename()));
        }
    }
    modelLoadProps.insert("$duration", modelLoadTimer.elapsed() / 1000.);
-    return true;
+    co_return true;
 }
 bool ChatLLM::isModelLoaded() const
-{
+{ return bool(m_llmInstance); }
    return m_llModelInfo.model && m_llModelInfo.model->isModelLoaded();
 }
 static QString &removeLeadingWhitespace(QString &s)
 {
@ -599,50 +485,34 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
 }
 void ChatLLM::acquireModel()
-{
+{ m_llmInstance = LLModelStore::globalInstance()->acquireModel(); }
    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
    emit loadedModelInfoChanged();
 }
 void ChatLLM::resetModel()
-{
+{ m_llmInstance.reset(); }
    m_llModelInfo = {};
    emit loadedModelInfoChanged();
 }
 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
    // ignore attempts to switch to the same model twice
    if (!isModelLoaded() || this->modelInfo() != modelInfo) {
        m_shouldBeLoaded = true;
-        loadModel(modelInfo);
+        QCoro::waitFor(loadModel(modelInfo));
    }
 }
-static backend::GenerationParams genParamsFromSettings(const ModelInfo &modelInfo)
+auto ChatLLM::modelDescription() -> const ModelDescription *
-{
+{ return m_llmInstance->description(); }
    auto *mySettings = MySettings::globalInstance();
    return {
        .n_predict      = mySettings->modelMaxLength          (modelInfo),
        .top_k          = mySettings->modelTopK               (modelInfo),
        .top_p          = float(mySettings->modelTopP         (modelInfo)),
        .min_p          = float(mySettings->modelMinP         (modelInfo)),
        .temp           = float(mySettings->modelTemperature  (modelInfo)),
        .n_batch        = mySettings->modelPromptBatchSize    (modelInfo),
        .repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
        .repeat_last_n  = mySettings->modelRepeatPenaltyTokens(modelInfo),
    };
 }
 void ChatLLM::prompt(const QStringList &enabledCollections)
 {
    auto *mySettings = MySettings::globalInstance();
    if (!isModelLoaded()) {
        emit responseStopped(0);
        return;
    }
    try {
-        promptInternalChat(enabledCollections, genParamsFromSettings(m_modelInfo));
+        promptInternalChat(enabledCollections, mySettings->modelGenParams(m_modelInfo));
    } catch (const std::exception &e) {
        // FIXME(jared): this is neither translated nor serialized
        m_chatModel->setResponseValue(u"Error: %1"_s.arg(QString::fromUtf8(e.what())));
@ -706,7 +576,6 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
    Q_ASSERT(items.size() >= 1);
    auto *mySettings = MySettings::globalInstance();
    auto &model      = m_llModelInfo.model;
    QString chatTemplate, systemMessage;
    auto chatTemplateSetting = mySettings->modelChatTemplate(m_modelInfo);
@ -756,8 +625,11 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
        { "add_generation_prompt", true                },
        { "toolList",              toolList            },
    };
-    for (auto &[name, token] : model->specialTokens())
+    // TODO: implement special tokens
 #if 0
    for (auto &[name, token] : m_llmInstance->specialTokens())
        params.emplace(std::move(name), std::move(token));
 #endif
    try {
        auto tmpl = loadJinjaTemplate(chatTemplate.toStdString());
@ -769,7 +641,7 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
    Q_UNREACHABLE();
 }
-auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const backend::GenerationParams &params,
+auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const GenerationParams &params,
                                 qsizetype startOffset) -> ChatPromptResult
 {
    Q_ASSERT(isModelLoaded());
@ -876,10 +748,9 @@ private:
 };
 auto ChatLLM::promptInternal(
-    const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
+    const std::variant<std::span<const MessageItem>, std::string_view> &prompt, const GenerationParams &params,
    const backend::GenerationParams params,
    bool usedLocalDocs
-) -> PromptResult
+) -> QCoro::Task<PromptResult>
 {
    Q_ASSERT(isModelLoaded());
@ -897,22 +768,6 @@ auto ChatLLM::promptInternal(
        conversation = jinjaBuffer;
    }
    // check for overlength last message
    if (!dynamic_cast<const ChatAPI *>(m_llModelInfo.model.get())) {
        auto nCtx = m_llModelInfo.model->contextLength();
        std::string jinjaBuffer2;
        auto lastMessageRendered = (messageItems && messageItems->size() > 1)
            ? std::string_view(jinjaBuffer2 = applyJinjaTemplate({ &messageItems->back(), 1 }))
            : conversation;
        int32_t lastMessageLength = m_llModelInfo.model->countPromptTokens(lastMessageRendered);
        if (auto limit = nCtx - 4; lastMessageLength > limit) {
            throw std::invalid_argument(
                tr("Your message was too long and could not be processed (%1 > %2). "
                   "Please try again with something shorter.").arg(lastMessageLength).arg(limit).toUtf8().constData()
            );
        }
    }
    PromptResult result {};
    QElapsedTimer totalTime;
@ -920,16 +775,14 @@ auto ChatLLM::promptInternal(
    ChatViewResponseHandler respHandler(this, &totalTime, &result);
    m_timer->start();
-    QStringList finalBuffers;
+    PromptModelWithToolsResult withToolsResult;
    bool        shouldExecuteTool;
    try {
        emit promptProcessing();
        m_llModelInfo.model->setThreadCount(mySettings->threadCount());
        m_stopGenerating = false;
        // TODO: set result.promptTokens based on ollama prompt_eval_count
        // TODO: support interruption via m_stopGenerating
-        std::tie(finalBuffers, shouldExecuteTool) = promptModelWithTools(
+        withToolsResult = co_await promptModelWithTools(
-            m_llModelInfo.model.get(), handlePrompt, respHandler, params,
+            m_llmInstance.get(), respHandler, params,
            QByteArray::fromRawData(conversation.data(), conversation.size()),
            ToolCallConstants::AllTagNames
        );
@ -937,6 +790,8 @@ auto ChatLLM::promptInternal(
        m_timer->stop();
        throw;
    }
    // TODO: use metadata
    auto &[metadata, finalBuffers, shouldExecuteTool] = withToolsResult;
    m_timer->stop();
    qint64 elapsed = totalTime.elapsed();
@ -964,13 +819,13 @@ auto ChatLLM::promptInternal(
    else
        emit responseStopped(elapsed);
-    return result;
+    co_return result;
 }
 void ChatLLM::setShouldBeLoaded(bool b)
 {
 #if defined(DEBUG_MODEL_LOADING)
-    qDebug() << "setShouldBeLoaded" << m_llmThread.objectName() << b << m_llModelInfo.model.get();
+    qDebug() << "setShouldBeLoaded" << m_llmThread.objectName() << b << m_llmInstance.model.get();
 #endif
    m_shouldBeLoaded = b; // atomic
    emit shouldBeLoadedChanged();
@ -1001,15 +856,15 @@ void ChatLLM::unloadModel()
        emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
 #if defined(DEBUG_MODEL_LOADING)
-    qDebug() << "unloadModel" << m_llmThread.objectName() << m_llModelInfo.model.get();
+    qDebug() << "unloadModel" << m_llmThread.objectName() << m_llmInstance.model.get();
 #endif
    if (m_forceUnloadModel) {
-        m_llModelInfo.resetModel(this);
+        m_llmInstance.reset();
        m_forceUnloadModel = false;
    }
-    LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
+    LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
 }
 void ChatLLM::reloadModel()
@ -1021,13 +876,13 @@ void ChatLLM::reloadModel()
        return;
 #if defined(DEBUG_MODEL_LOADING)
-    qDebug() << "reloadModel" << m_llmThread.objectName() << m_llModelInfo.model.get();
+    qDebug() << "reloadModel" << m_llmThread.objectName() << m_llmInstance.model.get();
 #endif
    const ModelInfo m = modelInfo();
    if (m.name().isEmpty())
        loadDefaultModel();
    else
-        loadModel(m);
+        QCoro::waitFor(loadModel(m));
 }
 // This class throws discards the text within thinking tags, for use with chat names and follow-up questions.
@ -1111,8 +966,8 @@ void ChatLLM::generateName()
    try {
        // TODO: support interruption via m_stopGenerating
        promptModelWithTools(
-            m_llModelInfo.model.get(),
+            m_llmInstance.get(),
-            respHandler, genParamsFromSettings(m_modelInfo),
+            respHandler, mySettings->modelGenParams(m_modelInfo),
            applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(),
            { ToolCallConstants::ThinkTagName }
        );
@ -1187,8 +1042,8 @@ void ChatLLM::generateQuestions(qint64 elapsed)
    try {
        // TODO: support interruption via m_stopGenerating
        promptModelWithTools(
-            m_llModelInfo.model.get(),
+            m_llmInstance.get(),
-            respHandler, genParamsFromSettings(m_modelInfo),
+            respHandler, mySettings->modelGenParams(m_modelInfo),
            applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(),
            { ToolCallConstants::ThinkTagName }
        );
@ -1199,39 +1054,13 @@ void ChatLLM::generateQuestions(qint64 elapsed)
    emit responseStopped(elapsed);
 }
 // this function serialized the cached model state to disk.
 // we want to also serialize n_ctx, and read it at load time.
 bool ChatLLM::serialize(QDataStream &stream, int version)
 {
-    if (version < 11) {
+    static constexpr int VERSION_MIN = 13;
-        if (version >= 6) {
+    if (version < VERSION_MIN)
-            stream << false; // serializeKV
+        throw std::runtime_error(fmt::format("ChatLLM does not support serializing as version {} (min is {})",
-        }
+                                             version, VERSION_MIN));
-        if (version >= 2) {
+    // nothing to do here; ChatLLM doesn't serialize any state itself anymore
            if (m_llModelType == LLModelTypeV1::NONE) {
                qWarning() << "ChatLLM ERROR: attempted to serialize a null model for chat id" << m_chat->id()
                           << "name" << m_chat->name();
                return false;
            }
            stream << m_llModelType;
            stream << 0; // state version
        }
        {
            QString dummy;
            stream << dummy; // response
            stream << dummy; // generated name
        }
        stream << quint32(0); // prompt + response tokens
        if (version < 6) { // serialize binary state
            if (version < 4) {
                stream << 0; // responseLogits
            }
            stream << int32_t(0); // n_past
            stream << quint64(0); // input token count
            stream << QByteArray(); // KV cache state
        }
    }
    return stream.status() == QDataStream::Ok;
 }
--- a/gpt4all-chat/src/chatllm.h
+++ b/gpt4all-chat/src/chatllm.h
@ -3,7 +3,7 @@
 #include "chatmodel.h"
 #include "database.h"
-#include "llmodel/chat.h"
+#include "llmodel_chat.h"
 #include "modellist.h"
 #include <QByteArray>
@ -31,6 +31,7 @@ using namespace Qt::Literals::StringLiterals;
 class ChatLLM;
 class QDataStream;
 namespace QCoro { template <typename T> class Task; }
 // NOTE: values serialized to disk, do not change or reuse
@ -89,12 +90,6 @@ inline LLModelTypeV1 parseLLModelTypeV0(int v0)
    }
 }
 struct LLModelInfo {
    std::unique_ptr<gpt4all::ui::ChatLLModel> model;
    QFileInfo fileInfo;
    void resetModel(ChatLLM *cllm, gpt4all::ui::ChatLLModel *model = nullptr);
 };
 class TokenTimer : public QObject {
    Q_OBJECT
 public:
@ -173,7 +168,7 @@ public Q_SLOTS:
    void prompt(const QStringList &enabledCollections);
    bool loadDefaultModel();
    void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
-    bool loadModel(const ModelInfo &modelInfo);
+    auto loadModel(const ModelInfo &modelInfo) -> QCoro::Task<bool>;
    void modelChangeRequested(const ModelInfo &modelInfo);
    void unloadModel();
    void reloadModel();
@ -215,14 +210,16 @@ protected:
        QList<ResultInfo> databaseResults;
    };
-    auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::backend::GenerationParams &params,
+    auto modelDescription() -> const gpt4all::ui::ModelDescription *;
    auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::ui::GenerationParams &params,
                            qsizetype startOffset = 0) -> ChatPromptResult;
    // passing a string_view directly skips templating and uses the raw string
    auto promptInternal(const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
-                        const gpt4all::backend::GenerationParams &params, bool usedLocalDocs) -> PromptResult;
+                        const gpt4all::ui::GenerationParams &params, bool usedLocalDocs) -> QCoro::Task<PromptResult>;
 private:
-    bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
+    auto loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) -> QCoro::Task<bool>;
    std::vector<MessageItem> forkConversation(const QString &prompt) const;
@ -237,11 +234,11 @@ protected:
 private:
    const Chat *m_chat;
-    LLModelInfo m_llModelInfo;
+    std::unique_ptr<gpt4all::ui::ChatLLMInstance> m_llmInstance;
    LLModelTypeV1 m_llModelType = LLModelTypeV1::NONE;
    ModelInfo m_modelInfo;
    TokenTimer *m_timer;
    QThread m_llmThread;
    QNetworkAccessManager m_nam; // TODO(jared): avoid making multiple thread pools
    std::atomic<bool> m_stopGenerating;
    std::atomic<bool> m_shouldBeLoaded;
    std::atomic<bool> m_forceUnloadModel;
--- a/gpt4all-chat/src/embllm.cpp
+++ b/gpt4all-chat/src/embllm.cpp
@ -110,10 +110,6 @@ bool EmbeddingLLMWorker::loadModel()
        return false;
    }
    // FIXME(jared): the user may want this to take effect without having to restart
    int n_threads = MySettings::globalInstance()->threadCount();
    m_model->setThreadCount(n_threads);
    return true;
 }
--- a/gpt4all-chat/src/json-helpers.cpp
+++ b/gpt4all-chat/src/json-helpers.cpp
@ -0,0 +1,46 @@
 #include "json-helpers.h"
 #include <boost/json.hpp> // IWYU pragma: keep
 #include <boost/system.hpp> // IWYU pragma: keep
 #include <gpt4all-backend/json-helpers.h>
 #include <QByteArray>
 #include <QUrl>
 #include <QUuid>
 #include <QtAssert>
 #include <system_error>
 namespace json = boost::json;
 namespace sys = boost::system;
 void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUuid &uuid)
 {
    auto bytes = uuid.toRfc4122().toBase64();
    value = json::value_from(json::string_view(bytes.data(), bytes.size()));
 }
 QUuid tag_invoke(const boost::json::value_to_tag<QUuid> &, const boost::json::value &value)
 {
    auto &s = value.as_string();
    auto bytes = QByteArray::fromRawData(s.data(), s.size());
    auto result = QByteArray::fromBase64Encoding(bytes);
    if (!result)
        throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
    auto uuid = QUuid::fromRfc4122(result.decoded);
    Q_ASSERT(!uuid.isNull()); // this may fail if the user manually creates a null UUID
    return uuid;
 }
 void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUrl &url)
 {
    auto bytes = url.toEncoded();
    value = json::value_from(json::string_view(bytes.data(), bytes.size()));
 }
 QUrl tag_invoke(const boost::json::value_to_tag<QUrl> &, const boost::json::value &value)
 {
    auto &s = value.as_string();
    return QUrl::fromEncoded(QByteArray::fromRawData(s.data(), s.size()));
 }
--- a/gpt4all-chat/src/json-helpers.h
+++ b/gpt4all-chat/src/json-helpers.h
@ -0,0 +1,15 @@
 #pragma once
 class QUrl;
 class QUuid;
 namespace boost::json {
    class value;
    struct value_from_tag;
    template <typename T> struct value_to_tag;
 }
 void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUuid &uuid);
 QUuid tag_invoke(const boost::json::value_to_tag<QUuid> &, const boost::json::value &value);
 void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUrl &url);
 QUrl tag_invoke(const boost::json::value_to_tag<QUrl> &, const boost::json::value &value);
--- a/gpt4all-chat/src/llmodel/chat.h
+++ b/gpt4all-chat/src/llmodel/chat.h
@ -1,32 +0,0 @@
 #pragma once
 #include <QStringView>
 class QString;
 namespace QCoro { template <typename T> class AsyncGenerator; }
 namespace gpt4all::backend { struct GenerationParams; }
 namespace gpt4all::ui {
 struct ChatResponseMetadata {
    int nPromptTokens;
    int nResponseTokens;
 };
 // TODO: implement two of these; one based on Ollama (TBD) and the other based on OpenAI (chatapi.h)
 class ChatLLModel {
 public:
    virtual ~ChatLLModel() = 0;
    [[nodiscard]]
    virtual QString name() = 0;
    virtual void preload() = 0;
    virtual auto chat(QStringView prompt, const backend::GenerationParams &params,
                      /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString> = 0;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel/openai.h
+++ b/gpt4all-chat/src/llmodel/openai.h
@ -1,75 +0,0 @@
 #pragma once
 #include "chat.h"
 #include "provider.h"
 #include <QObject>
 #include <QQmlEngine>
 #include <QString>
 #include <QUrl>
 class QNetworkAccessManager;
 namespace gpt4all::ui {
 class OpenaiModelDescription : public QObject {
    Q_OBJECT
    QML_ELEMENT
 public:
    explicit OpenaiModelDescription(OpenaiProvider *provider, QString displayName, QString modelName)
        : QObject(provider)
        , m_provider(provider)
        , m_displayName(std::move(displayName))
        , m_modelName(std::move(modelName))
        {}
    // getters
    [[nodiscard]] OpenaiProvider *provider   () const { return m_provider;    }
    [[nodiscard]] const QString  &displayName() const { return m_displayName; }
    [[nodiscard]] const QString  &modelName  () const { return m_modelName;   }
    // setters
    void setDisplayName(QString value);
    void setModelName  (QString value);
 Q_SIGNALS:
    void displayNameChanged(const QString &value);
    void modelNameChanged  (const QString &value);
 private:
    OpenaiProvider *m_provider;
    QString         m_displayName;
    QString         m_modelName;
 };
 struct OpenaiConnectionDetails {
    QUrl    baseUrl;
    QString modelName;
    QString apiKey;
    OpenaiConnectionDetails(const OpenaiModelDescription *desc)
        : baseUrl(desc->provider()->baseUrl())
        , apiKey(desc->provider()->apiKey())
        , modelName(desc->modelName())
        {}
 };
 class OpenaiLLModel : public ChatLLModel {
 public:
    explicit OpenaiLLModel(OpenaiConnectionDetails connDetails, QNetworkAccessManager *nam);
    void preload() override { /* not supported -> no-op */ }
    auto chat(QStringView prompt, const backend::GenerationParams &params, /*out*/ ChatResponseMetadata &metadata)
        -> QCoro::AsyncGenerator<QString> override;
 private:
    OpenaiConnectionDetails  m_connDetails;
    QNetworkAccessManager   *m_nam;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel/provider.cpp
+++ b/gpt4all-chat/src/llmodel/provider.cpp
@ -1,26 +0,0 @@
 #include "provider.h"
 #include <utility>
 namespace gpt4all::ui {
 void OpenaiProvider::setBaseUrl(QUrl value)
 {
    if (m_baseUrl != value) {
        m_baseUrl = std::move(value);
        emit baseUrlChanged(m_baseUrl);
    }
 }
 void OpenaiProvider::setApiKey(QString value)
 {
    if (m_apiKey != value) {
        m_apiKey = std::move(value);
        emit apiKeyChanged(m_apiKey);
    }
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel/provider.h
+++ b/gpt4all-chat/src/llmodel/provider.h
@ -1,47 +0,0 @@
 #pragma once
 #include <QObject>
 #include <QQmlEngine>
 #include <QString>
 #include <QUrl>
 namespace gpt4all::ui {
 class ModelProvider : public QObject {
    Q_OBJECT
    Q_PROPERTY(QString name READ name CONSTANT)
 public:
    [[nodiscard]] virtual QString name() = 0;
 };
 class OpenaiProvider : public ModelProvider {
    Q_OBJECT
    QML_ELEMENT
    Q_PROPERTY(QUrl    baseUrl READ baseUrl WRITE setBaseUrl NOTIFY baseUrlChanged)
    Q_PROPERTY(QString apiKey  READ apiKey  WRITE setApiKey  NOTIFY apiKeyChanged)
 public:
    [[nodiscard]] QString name() override  { return m_name;    }
    [[nodiscard]] const QUrl    &baseUrl() { return m_baseUrl; }
    [[nodiscard]] const QString &apiKey () { return m_apiKey;  }
    void setBaseUrl(QUrl    value);
    void setApiKey (QString value);
 Q_SIGNALS:
    void baseUrlChanged(const QUrl    &value);
    void apiKeyChanged (const QString &value);
 private:
    QString m_name;
    QUrl    m_baseUrl;
    QString m_apiKey;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_chat.h
+++ b/gpt4all-chat/src/llmodel_chat.h
@ -0,0 +1,34 @@
 #pragma once
 class QString;
 class QStringView;
 namespace QCoro {
    template <typename T> class AsyncGenerator;
    template <typename T> class Task;
 }
 namespace gpt4all::ui {
 class GenerationParams;
 class ModelDescription;
 struct ChatResponseMetadata {
    int nPromptTokens;
    int nResponseTokens;
 };
 // TODO: implement two of these; one based on Ollama (TBD) and the other based on OpenAI (chatapi.h)
 class ChatLLMInstance {
 public:
    virtual ~ChatLLMInstance() = 0;
    virtual auto description() const -> const ModelDescription * = 0;
    virtual auto preload() -> QCoro::Task<void> = 0;
    virtual auto generate(QStringView prompt, const GenerationParams &params, /*out*/ ChatResponseMetadata &metadata)
        -> QCoro::AsyncGenerator<QString> = 0;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_description.cpp
+++ b/gpt4all-chat/src/llmodel_description.cpp
@ -0,0 +1,22 @@
 #include "llmodel_description.h"
 #include "llmodel_chat.h"
 #include "llmodel_provider.h"
 namespace gpt4all::ui {
 auto ModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<ChatLLMInstance>
 { return std::unique_ptr<ChatLLMInstance>(newInstanceImpl(nam)); }
 bool operator==(const ModelDescription &a, const ModelDescription &b)
 {
    if (typeid(a) != typeid(b))
        return false;
    return *a.provider() == *b.provider() && a.key() == b.key();
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_description.h
+++ b/gpt4all-chat/src/llmodel_description.h
@ -0,0 +1,40 @@
 #pragma once
 #include <QObject>
 #include <QVariant>
 #include <memory>
 class QNetworkAccessManager;
 namespace gpt4all::ui {
 class ChatLLMInstance;
 class ModelProvider;
 // TODO: implement shared_from_this guidance for restricted construction
 class ModelDescription : public std::enable_shared_from_this<ModelDescription> {
    Q_GADGET
    Q_PROPERTY(const ModelProvider *provider READ provider CONSTANT)
    Q_PROPERTY(QVariant             key      READ key      CONSTANT)
 public:
    virtual ~ModelDescription() noexcept = 0;
    // getters
    [[nodiscard]] virtual auto     provider() const -> const ModelProvider * = 0;
    [[nodiscard]] virtual QVariant key     () const = 0;
    /// create an instance to chat with
    [[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<ChatLLMInstance>;
    friend bool operator==(const ModelDescription &a, const ModelDescription &b);
 protected:
    [[nodiscard]] virtual auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * = 0;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_ollama.cpp
+++ b/gpt4all-chat/src/llmodel_ollama.cpp
@ -0,0 +1,82 @@
 #include "llmodel_ollama.h"
 #include <QCoro/QCoroAsyncGenerator>
 #include <QCoro/QCoroTask>
 using namespace Qt::Literals::StringLiterals;
 namespace gpt4all::ui {
 void OllamaGenerationParams::parseInner(QMap<GenerationParam, QVariant> &values)
 {
    tryParseValue(values, GenerationParam::NPredict, &OllamaGenerationParams::n_predict);
 }
 auto OllamaGenerationParams::toMap() const -> QMap<QLatin1StringView, QVariant>
 {
    return {
        { "n_predict"_L1, n_predict },
    };
 }
 auto OllamaProvider::supportedGenerationParams() const -> QSet<GenerationParam>
 {
    using enum GenerationParam;
    return { NPredict };
 }
 auto OllamaProvider::makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const
    -> OllamaGenerationParams *
 { return new OllamaGenerationParams(values); }
 /// load
 OllamaProviderCustom::OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id)
    : ModelProvider(std::move(id))
    , ModelProviderCustom(std::move(store))
 { load(); }
 /// create
 OllamaProviderCustom::OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl)
    : ModelProvider(std::move(name), std::move(baseUrl))
    , ModelProviderCustom(std::move(store))
 {
    auto data = m_store->create(m_name, m_baseUrl);
    if (!data)
        data.error().raise();
    m_id = (*data)->id;
 }
 OllamaModelDescription::OllamaModelDescription(std::shared_ptr<const OllamaProvider> provider, QByteArray modelHash)
    : m_provider(std::move(provider))
    , m_modelHash(std::move(modelHash))
    {}
 auto OllamaModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OllamaChatModel>
 { return std::unique_ptr<OllamaChatModel>(&dynamic_cast<OllamaChatModel &>(*newInstanceImpl(nam))); }
 auto OllamaModelDescription::newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance *
 { return new OllamaChatModel({ shared_from_this(), this }, nam); }
 OllamaChatModel::OllamaChatModel(std::shared_ptr<const OllamaModelDescription> description, QNetworkAccessManager *nam)
    : m_description(std::move(description))
    , m_nam(nam)
    {}
 auto OllamaChatModel::preload() -> QCoro::Task<>
 {
    // TODO: implement
    co_return;
 }
 auto OllamaChatModel::generate(QStringView prompt, const GenerationParams &params,
                               /*out*/ ChatResponseMetadata &metadata)
    -> QCoro::AsyncGenerator<QString>
 {
    // TODO: implement
    co_yield QStringLiteral("(TODO: response from ollama)");
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_ollama.h
+++ b/gpt4all-chat/src/llmodel_ollama.h
@ -0,0 +1,122 @@
 #pragma once
 #include "llmodel_chat.h"
 #include "llmodel_description.h"
 #include "llmodel_provider.h"
 #include <QByteArray>
 #include <QLatin1StringView> // IWYU pragma: keep
 #include <QObject>
 #include <QString>
 #include <QUrl>
 #include <QVariant>
 #include <QtTypes> // IWYU pragma: keep
 class QNetworkAccessManager;
 template <typename Key, typename T> class QMap;
 template <typename T> class QSet;
 namespace gpt4all::ui {
 class OllamaChatModel;
 struct OllamaGenerationParamsData {
    uint n_predict;
    // TODO(jared): include ollama-specific generation params
 };
 class OllamaGenerationParams : public GenerationParams, public OllamaGenerationParamsData {
 public:
    explicit OllamaGenerationParams(QMap<GenerationParam, QVariant> values) { parse(std::move(values)); }
    auto toMap() const -> QMap<QLatin1StringView, QVariant> override;
    bool isNoop() const override { return !n_predict; }
 protected:
    void parseInner(QMap<GenerationParam, QVariant> &values) override;
 };
 class OllamaProvider : public QObject, public virtual ModelProvider {
    Q_OBJECT
 public:
    ~OllamaProvider() noexcept override = 0;
          QObject *asQObject()       override { return this; }
    const QObject *asQObject() const override { return this; }
    auto supportedGenerationParams() const -> QSet<GenerationParam> override;
    auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> OllamaGenerationParams * override;
 };
 class OllamaProviderBuiltin : public ModelProviderBuiltin, public OllamaProvider {
    Q_GADGET
 public:
    /// Create a new built-in Ollama provider (transient).
    explicit OllamaProviderBuiltin(QUuid id, QString name, QUrl baseUrl)
        : ModelProvider(std::move(id), std::move(name), std::move(baseUrl)) {}
 };
 class OllamaProviderCustom final : public OllamaProvider, public ModelProviderCustom {
    Q_OBJECT
 public:
    /// Load an existing OllamaProvider from disk.
    explicit OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id);
    /// Create a new OllamaProvider on disk.
    explicit OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl);
 Q_SIGNALS:
    void nameChanged   (const QString &value);
    void baseUrlChanged(const QUrl    &value);
 protected:
    auto asData() -> ModelProviderData override
    { return { m_id, ProviderType::ollama, m_name, m_baseUrl, {} }; }
 };
 class OllamaModelDescription : public ModelDescription {
    Q_GADGET
    Q_PROPERTY(QByteArray modelHash READ modelHash CONSTANT)
 public:
    explicit OllamaModelDescription(std::shared_ptr<const OllamaProvider> provider, QByteArray modelHash);
    // getters
    [[nodiscard]] auto              provider () const -> const OllamaProvider * override { return m_provider.get(); }
    [[nodiscard]] QVariant          key      () const                           override { return m_modelHash;      }
    [[nodiscard]] const QByteArray &modelHash() const                                    { return m_modelHash;      }
    [[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OllamaChatModel>;
 protected:
    [[nodiscard]] auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * override;
 private:
    std::shared_ptr<const OllamaProvider> m_provider;
    QByteArray                            m_modelHash;
 };
 class OllamaChatModel : public ChatLLMInstance {
 public:
    explicit OllamaChatModel(std::shared_ptr<const OllamaModelDescription> description, QNetworkAccessManager *nam);
    auto description() const -> const OllamaModelDescription * override
    { return m_description.get(); }
    auto preload() -> QCoro::Task<void> override;
    auto generate(QStringView prompt, const GenerationParams &params, /*out*/ ChatResponseMetadata &metadata)
        -> QCoro::AsyncGenerator<QString> override;
 private:
    std::shared_ptr<const OllamaModelDescription> m_description;
    // TODO: implement generate using Ollama backend
    QNetworkAccessManager                         *m_nam;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_openai.cpp
+++ b/gpt4all-chat/src/llmodel_openai.cpp
@ -1,4 +1,4 @@
-#include "openai.h"
+#include "llmodel_openai.h"
 #include "mysettings.h"
 #include "utils.h"
@ -6,29 +6,34 @@
 #include <QCoro/QCoroAsyncGenerator> // IWYU pragma: keep
 #include <QCoro/QCoroNetworkReply> // IWYU pragma: keep
 #include <fmt/format.h>
-#include <gpt4all-backend/formatters.h>
+#include <gpt4all-backend/formatters.h> // IWYU pragma: keep
 #include <gpt4all-backend/generation-params.h>
 #include <gpt4all-backend/rest.h>
 #include <QAnyStringView>
 #include <QByteArray>
 #include <QJsonArray>
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QJsonValue>
-#include <QLatin1String>
+#include <QList>
-#include <QNetworkAccessManager>
+#include <QMap>
 #include <QMetaEnum>
 #include <QNetworkReply>
 #include <QNetworkRequest>
 #include <QRestAccessManager>
 #include <QRestReply>
 #include <QSet>
 #include <QStringView>
 #include <QUrl>
 #include <QUtf8StringView> // IWYU pragma: keep
 #include <QVariant>
 #include <QXmlStreamReader>
-#include <Qt>
+#include <QtAssert>
 #include <coroutine>
 #include <expected>
 #include <memory>
 #include <optional>
 #include <stdexcept>
 #include <utility>
 using namespace Qt::Literals::StringLiterals;
@ -63,24 +68,72 @@ static auto processRespLine(const QByteArray &line) -> std::optional<QString>
 namespace gpt4all::ui {
-void OpenaiModelDescription::setDisplayName(QString value)
+void OpenaiGenerationParams::parseInner(QMap<GenerationParam, QVariant> &values)
 {
-    if (m_displayName != value) {
+    tryParseValue(values, GenerationParam::NPredict,    &OpenaiGenerationParams::n_predict  );
-        m_displayName = std::move(value);
+    tryParseValue(values, GenerationParam::Temperature, &OpenaiGenerationParams::temperature);
-        emit displayNameChanged(m_displayName);
+    tryParseValue(values, GenerationParam::TopP,        &OpenaiGenerationParams::top_p      );
    }
 }
-void OpenaiModelDescription::setModelName(QString value)
+auto OpenaiGenerationParams::toMap() const -> QMap<QLatin1StringView, QVariant>
 {
-    if (m_modelName != value) {
+    return {
-        m_modelName = std::move(value);
+        {  "max_completion_tokens"_L1,  n_predict   },
-        emit modelNameChanged(m_modelName);
+        {  "temperature"_L1,            temperature },
-    }
+        {  "top_p"_L1,                  top_p       },
    };
 }
-OpenaiLLModel::OpenaiLLModel(OpenaiConnectionDetails connDetails, QNetworkAccessManager *nam)
+auto OpenaiProvider::supportedGenerationParams() const -> QSet<GenerationParam>
-    : m_connDetails(std::move(connDetails))
+{
    using enum GenerationParam;
    return { NPredict, Temperature, TopP };
 }
 auto OpenaiProvider::makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const
    -> OpenaiGenerationParams *
 { return new OpenaiGenerationParams(values); }
 OpenaiProviderBuiltin::OpenaiProviderBuiltin(QUuid id, QString name, QUrl baseUrl, QString apiKey)
    : ModelProvider(std::move(id), std::move(name), std::move(baseUrl))
    , OpenaiProvider(std::move(apiKey))
    {}
 /// load
 OpenaiProviderCustom::OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id)
    : ModelProvider(std::move(id))
    , ModelProviderCustom(std::move(store))
 {
    auto &details = load();
    m_apiKey = std::get<OpenaiProviderDetails>(details).api_key;
 }
 /// create
 OpenaiProviderCustom::OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl,
                                           QString apiKey)
    : ModelProvider(std::move(name), std::move(baseUrl))
    , ModelProviderCustom(std::move(store))
    , OpenaiProvider(std::move(apiKey))
 {
    auto data = m_store->create(m_name, m_baseUrl, m_apiKey);
    if (!data)
        data.error().raise();
    m_id = (*data)->id;
 }
 OpenaiModelDescription::OpenaiModelDescription(std::shared_ptr<const OpenaiProvider> provider, QString modelName)
    : m_provider(std::move(provider))
    , m_modelName(std::move(modelName))
    {}
 auto OpenaiModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OpenaiChatModel>
 { return std::unique_ptr<OpenaiChatModel>(&dynamic_cast<OpenaiChatModel &>(*newInstanceImpl(nam))); }
 auto OpenaiModelDescription::newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance *
 { return new OpenaiChatModel({ shared_from_this(), this }, nam); }
 OpenaiChatModel::OpenaiChatModel(std::shared_ptr<const OpenaiModelDescription> description, QNetworkAccessManager *nam)
    : m_description(std::move(description))
    , m_nam(nam)
    {}
@ -159,21 +212,22 @@ static auto parsePrompt(QXmlStreamReader &xml) -> std::expected<QJsonArray, QStr
    }
 }
-auto OpenaiLLModel::chat(QStringView prompt, const backend::GenerationParams &params,
+auto preload() -> QCoro::Task<>
-                         /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString>
+{ co_return; /* not supported -> no-op */ }
 auto OpenaiChatModel::generate(QStringView prompt, const GenerationParams &params,
                               /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString>
 {
    auto *mySettings = MySettings::globalInstance();
-    if (!params.n_predict)
+    if (params.isNoop())
        co_return; // nothing requested
    auto reqBody = makeJsonObject({
-        { "model"_L1,                 m_connDetails.modelName  },
+        { "model"_L1,  m_description->modelName() },
-        { "max_completion_tokens"_L1, qint64(params.n_predict) },
+        { "stream"_L1, true                       },
        { "stream"_L1,                true                     },
        { "temperature"_L1,           params.temperature       },
        { "top_p"_L1,                 params.top_p             },
    });
    extend(reqBody, params.toMap());
    // conversation history
    {
@ -184,9 +238,10 @@ auto OpenaiLLModel::chat(QStringView prompt, const backend::GenerationParams &pa
        reqBody.insert("messages"_L1, *messages);
    }
-    QNetworkRequest request(m_connDetails.baseUrl.resolved(QUrl("/v1/chat/completions")));
+    auto &provider = *m_description->provider();
    QNetworkRequest request(provider.baseUrl().resolved(QUrl("/v1/chat/completions")));
    request.setHeader(QNetworkRequest::UserAgentHeader, mySettings->userAgent());
-    request.setRawHeader("authorization", u"Bearer %1"_s.arg(m_connDetails.apiKey).toUtf8());
+    request.setRawHeader("authorization", u"Bearer %1"_s.arg(provider.apiKey()).toUtf8());
    QRestAccessManager restNam(m_nam);
    std::unique_ptr<QNetworkReply> reply(restNam.post(request, QJsonDocument(reqBody)));
--- a/gpt4all-chat/src/llmodel_openai.h
+++ b/gpt4all-chat/src/llmodel_openai.h
@ -0,0 +1,139 @@
 #pragma once
 #include "llmodel_chat.h"
 #include "llmodel_description.h"
 #include "llmodel_provider.h"
 #include <QLatin1StringView> // IWYU pragma: keep
 #include <QObject> // IWYU pragma: keep
 #include <QString>
 #include <QUrl>
 #include <QVariant>
 #include <QtTypes> // IWYU pragma: keep
 #include <memory>
 #include <utility>
 class QNetworkAccessManager;
 template <typename Key, typename T> class QMap;
 template <typename T> class QSet;
 namespace gpt4all::ui {
 class OpenaiChatModel;
 struct OpenaiGenerationParamsData {
    uint  n_predict;
    float temperature;
    float top_p;
 };
 class OpenaiGenerationParams : public GenerationParams, public OpenaiGenerationParamsData {
 public:
    explicit OpenaiGenerationParams(QMap<GenerationParam, QVariant> values) { parse(std::move(values)); }
    auto toMap() const -> QMap<QLatin1StringView, QVariant> override;
    bool isNoop() const override { return !n_predict; }
 protected:
    void parseInner(QMap<GenerationParam, QVariant> &values) override;
 };
 class OpenaiProvider : public QObject, public virtual ModelProvider {
    Q_OBJECT
 protected:
    explicit OpenaiProvider() = default; // custom
    explicit OpenaiProvider(QString apiKey) // built-in
        : m_apiKey(std::move(apiKey))
        {}
 public:
    ~OpenaiProvider() noexcept override = 0;
          QObject *asQObject()       override { return this; }
    const QObject *asQObject() const override { return this; }
    [[nodiscard]] const QString &apiKey() const { return m_apiKey; }
    auto supportedGenerationParams() const -> QSet<GenerationParam> override;
    auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> OpenaiGenerationParams * override;
 protected:
    QString m_apiKey;
 };
 class OpenaiProviderBuiltin : public ModelProviderBuiltin, public OpenaiProvider {
    Q_GADGET
    Q_PROPERTY(QString apiKey READ apiKey CONSTANT)
 public:
    /// Create a new built-in OpenAI provider (transient).
    explicit OpenaiProviderBuiltin(QUuid id, QString name, QUrl baseUrl, QString apiKey);
 };
 class OpenaiProviderCustom final : public OpenaiProvider, public ModelProviderCustom {
    Q_OBJECT
    Q_PROPERTY(QString apiKey READ apiKey WRITE setApiKey NOTIFY apiKeyChanged)
 public:
    /// Load an existing OpenaiProvider from disk.
    explicit OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id);
    /// Create a new OpenaiProvider on disk.
    explicit OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl, QString apiKey);
    void setApiKey(QString value) { setMemberProp<QString>(&OpenaiProviderCustom::m_apiKey, "apiKey", std::move(value)); }
 Q_SIGNALS:
    void nameChanged   (const QString &value);
    void baseUrlChanged(const QUrl    &value);
    void apiKeyChanged (const QString &value);
 protected:
    auto asData() -> ModelProviderData override
    { return { m_id, ProviderType::openai, m_name, m_baseUrl, OpenaiProviderDetails { m_apiKey } }; }
 };
 class OpenaiModelDescription : public ModelDescription {
    Q_GADGET
    Q_PROPERTY(QString modelName READ modelName CONSTANT)
 public:
    explicit OpenaiModelDescription(std::shared_ptr<const OpenaiProvider> provider, QString modelName);
    // getters
    [[nodiscard]] auto           provider () const -> const OpenaiProvider * override { return m_provider.get(); }
    [[nodiscard]] QVariant       key      () const                           override { return m_modelName;      }
    [[nodiscard]] const QString &modelName() const                                    { return m_modelName;      }
    [[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OpenaiChatModel>;
 protected:
    [[nodiscard]] auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * override;
 private:
    std::shared_ptr<const OpenaiProvider> m_provider;
    QString                               m_modelName;
 };
 class OpenaiChatModel : public ChatLLMInstance {
 public:
    explicit OpenaiChatModel(std::shared_ptr<const OpenaiModelDescription> description, QNetworkAccessManager *nam);
    auto description() const -> const OpenaiModelDescription * override
    { return m_description.get(); }
    auto preload() -> QCoro::Task<void> override;
    auto generate(QStringView prompt, const GenerationParams &params, /*out*/ ChatResponseMetadata &metadata)
        -> QCoro::AsyncGenerator<QString> override;
 private:
    std::shared_ptr<const OpenaiModelDescription>  m_description;
    QNetworkAccessManager                         *m_nam;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_provider.cpp
+++ b/gpt4all-chat/src/llmodel_provider.cpp
@ -0,0 +1,139 @@
 #include "llmodel_provider.h"
 #include "mysettings.h"
 #include <fmt/format.h>
 #include <gpt4all-backend/formatters.h> // IWYU pragma: keep
 #include <QModelIndex> // IWYU pragma: keep
 #include <QVariant>
 namespace fs = std::filesystem;
 namespace gpt4all::ui {
 void GenerationParams::parse(QMap<GenerationParam, QVariant> values)
 {
    parseInner(values);
    if (!values.isEmpty()) {
        auto gparamsMeta = QMetaEnum::fromType<GenerationParam>();
        throw std::invalid_argument(fmt::format(
            " unsupported param: {}", gparamsMeta.valueToKey(int(values.keys().constFirst()))
        ));
    }
 }
 QVariant GenerationParams::tryParseValue(QMap<GenerationParam, QVariant> &values, GenerationParam key,
                                         const QMetaType &type)
 {
    auto value = values.take(key);
    if (value.isValid() && !value.canConvert(type)) {
        auto gparamsMeta = QMetaEnum::fromType<GenerationParam>();
        throw std::invalid_argument(fmt::format(
            "expected {} of type {}, got {}", gparamsMeta.valueToKey(int(key)), type.name(), value.typeName()
        ));
    }
    return value;
 }
 ModelProviderCustom::~ModelProviderCustom() noexcept
 {
    if (auto res = m_store->release(m_id); !res)
        res.error().raise(); // should not happen - will terminate program
 }
 auto ModelProviderCustom::load() -> const ModelProviderData::Details &
 {
    auto data = m_store->acquire(m_id);
    if (!data)
        data.error().raise();
    m_name    = (*data)->name;
    m_baseUrl = (*data)->base_url;
    return (*data)->details;
 }
 ProviderRegistry::ProviderRegistry(fs::path path)
    : m_store(std::move(path))
 {
    auto *mysettings = MySettings::globalInstance();
    connect(mysettings, &MySettings::modelPathChanged, this, &ProviderRegistry::onModelPathChanged);
 }
 Q_INVOKABLE void ProviderRegistry::registerBuiltinProvider(ModelProviderBuiltin *provider)
 {
    auto [_, unique] = m_providers.emplace(provider->id(), provider->asQObject());
    if (!unique)
        qWarning() << "ignoring duplicate provider:" << provider->id();
 }
 [[nodiscard]]
 bool ProviderRegistry::registerCustomProvider(std::unique_ptr<ModelProviderCustom> provider)
 {
    auto [_, unique] = m_providers.emplace(provider->id(), provider->asQObject());
    if (unique) {
        m_customProviders.push_back(std::move(provider));
        emit customProviderAdded(m_customProviders.size() - 1);
    }
    return unique;
 }
 fs::path ProviderRegistry::getSubdir()
 {
    auto *mysettings = MySettings::globalInstance();
    return toFSPath(mysettings->modelPath()) / "providers";
 }
 void ProviderRegistry::onModelPathChanged()
 {
    auto path = getSubdir();
    if (path != m_store.path()) {
        emit aboutToBeCleared();
        m_customProviders.clear(); // delete custom providers to release store locks
        if (auto res = m_store.setPath(path); !res)
            res.error().raise(); // should not happen
    }
 }
 CustomProviderList::CustomProviderList(QPointer<ProviderRegistry> registry)
    : m_registry(std::move(registry))
    , m_size(m_registry->customProviderCount())
 {
    connect(m_registry, &ProviderRegistry::customProviderAdded, this, &CustomProviderList::onCustomProviderAdded);
    connect(m_registry, &ProviderRegistry::aboutToBeCleared, this, &CustomProviderList::onAboutToBeCleared,
            Qt::DirectConnection);
 }
 QVariant CustomProviderList::data(const QModelIndex &index, int role) const
 {
    if (index.isValid() && index.row() < rowCount() && role == Qt::DisplayRole)
        return QVariant::fromValue(m_registry->customProviderAt(index.row()));
    return {};
 }
 void CustomProviderList::onCustomProviderAdded(size_t index)
 {
    beginInsertRows({}, m_size, m_size);
    m_size++;
    endInsertRows();
 }
 void CustomProviderList::onAboutToBeCleared()
 {
    beginResetModel();
    m_size = 0;
    endResetModel();
 }
 bool CustomProviderListSort::lessThan(const QModelIndex &left, const QModelIndex &right) const
 {
    auto *leftData  = sourceModel()->data(left ).value<ModelProviderCustom *>();
    auto *rightData = sourceModel()->data(right).value<ModelProviderCustom *>();
    if (leftData && rightData)
        return QString::localeAwareCompare(leftData->name(), rightData->name()) < 0;
    return true;
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/llmodel_provider.h
+++ b/gpt4all-chat/src/llmodel_provider.h
@ -0,0 +1,199 @@
 #pragma once
 #include "store_provider.h"
 #include "utils.h" // IWYU pragma: keep
 #include <QAbstractListModel>
 #include <QObject>
 #include <QPointer>
 #include <QQmlEngine> // IWYU pragma: keep
 #include <QSortFilterProxyModel>
 #include <QString>
 #include <QUrl>
 #include <QUuid>
 #include <QtPreprocessorSupport>
 #include <cstddef>
 #include <filesystem>
 #include <memory>
 #include <string_view>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 class QJSEngine;
 namespace gpt4all::ui {
 Q_NAMESPACE
 enum class GenerationParam  {
    NPredict,
    Temperature,
    TopP,
    TopK,
    MinP,
    RepeatPenalty,
    RepeatLastN,
 };
 Q_ENUM_NS(GenerationParam)
 class GenerationParams {
 public:
    virtual ~GenerationParams() noexcept = 0;
    virtual QMap<QLatin1StringView, QVariant> toMap() const = 0;
    virtual bool isNoop() const = 0;
 protected:
    void parse(QMap<GenerationParam, QVariant> values);
    virtual void parseInner(QMap<GenerationParam, QVariant> &values) = 0;
    static QVariant tryParseValue(QMap<GenerationParam, QVariant> &values, GenerationParam key, const QMetaType &type);
    template <typename T, typename S, typename C>
    void tryParseValue(this S &self, QMap<GenerationParam, QVariant> &values, GenerationParam key, T C::* dest);
 };
 class ModelProvider {
    Q_GADGET
    Q_PROPERTY(QUuid id READ id CONSTANT)
 protected:
    explicit ModelProvider(QUuid id) // load
        : m_id(std::move(id)) {}
    explicit ModelProvider(QUuid id, QString name, QUrl baseUrl) // create built-in
        : m_id(std::move(id)), m_name(std::move(name)), m_baseUrl(std::move(baseUrl)) {}
    explicit ModelProvider(QString name, QUrl baseUrl) // create custom
        : m_name(std::move(name)), m_baseUrl(std::move(baseUrl)) {}
 public:
    virtual ~ModelProvider() noexcept = 0;
    virtual       QObject *asQObject() = 0;
    virtual const QObject *asQObject() const = 0;
    // getters
    [[nodiscard]] const QUuid   &id     () const { return m_id;      }
    [[nodiscard]] const QString &name   () const { return m_name;    }
    [[nodiscard]] const QUrl    &baseUrl() const { return m_baseUrl; }
    virtual auto supportedGenerationParams() const -> QSet<GenerationParam> = 0;
    virtual auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> GenerationParams * = 0;
    friend bool operator==(const ModelProvider &a, const ModelProvider &b)
    { return a.m_id == b.m_id; }
 protected:
    QUuid   m_id;
    QString m_name;
    QUrl    m_baseUrl;
 };
 class ModelProviderBuiltin : public virtual ModelProvider {
    Q_GADGET
    Q_PROPERTY(QString name    READ name    CONSTANT)
    Q_PROPERTY(QUrl    baseUrl READ baseUrl CONSTANT)
 public:
    ~ModelProviderBuiltin() noexcept override = 0;
 };
 class ModelProviderCustom : public virtual ModelProvider {
    Q_GADGET
    Q_PROPERTY(QString name    READ name    WRITE setName    NOTIFY nameChanged   )
    Q_PROPERTY(QUrl    baseUrl READ baseUrl WRITE setBaseUrl NOTIFY baseUrlChanged)
 protected:
    explicit ModelProviderCustom(std::shared_ptr<ProviderStore> store)
        : m_store(std::move(store)) {}
 public:
    ~ModelProviderCustom() noexcept override;
    // setters
    void setName   (QString value) { setMemberProp<QString>(&ModelProviderCustom::m_name,    "name",    std::move(value)); }
    void setBaseUrl(QUrl    value) { setMemberProp<QUrl   >(&ModelProviderCustom::m_baseUrl, "baseUrl", std::move(value)); }
 protected:
    virtual auto load() -> const ModelProviderData::Details &;
    virtual auto asData() -> ModelProviderData = 0;
    template <typename T, typename S, typename C>
    void setMemberProp(this S &self, T C::* member, std::string_view name, T value);
    std::shared_ptr<ProviderStore> m_store;
 };
 class ProviderRegistry : public QObject {
    Q_OBJECT
    QML_ELEMENT
    QML_SINGLETON
 protected:
    explicit ProviderRegistry(std::filesystem::path path);
 public:
    static ProviderRegistry *create(QQmlEngine *, QJSEngine *) { return new ProviderRegistry(getSubdir()); }
    Q_INVOKABLE   void registerBuiltinProvider(ModelProviderBuiltin *provider);
    [[nodiscard]] bool registerCustomProvider (std::unique_ptr<ModelProviderCustom> provider);
    size_t customProviderCount() const
    { return m_customProviders.size(); }
    auto customProviderAt(size_t i) const -> const ModelProviderCustom *
    { return m_customProviders.at(i).get(); }
    auto operator[](const QUuid &id) -> ModelProviderCustom *
    { return &dynamic_cast<ModelProviderCustom &>(*m_providers.at(id)); }
 Q_SIGNALS:
    void customProviderAdded(size_t index);
    void aboutToBeCleared();
 private:
    static auto getSubdir() -> std::filesystem::path;
 private Q_SLOTS:
    void onModelPathChanged();
 private:
    ProviderStore                                     m_store;
    std::unordered_map<QUuid, QPointer<QObject>>      m_providers;
    std::vector<std::unique_ptr<ModelProviderCustom>> m_customProviders;
 };
 class CustomProviderList : public QAbstractListModel {
    Q_OBJECT
    QML_ELEMENT
 protected:
    explicit CustomProviderList(QPointer<ProviderRegistry> registry);
 public:
    int rowCount(const QModelIndex &parent = {}) const override
    { Q_UNUSED(parent) return int(m_size); }
    QVariant data(const QModelIndex &index, int role) const override;
 private Q_SLOTS:
    void onCustomProviderAdded(size_t index);
    void onAboutToBeCleared();
 private:
    QPointer<ProviderRegistry> m_registry;
    size_t                     m_size;
 };
 class CustomProviderListSort : public QSortFilterProxyModel {
    Q_OBJECT
 protected:
    bool lessThan(const QModelIndex &left, const QModelIndex &right) const override;
 };
 } // namespace gpt4all::ui
 #include "llmodel_provider.inl" // IWYU pragma: export
--- a/gpt4all-chat/src/llmodel_provider.inl
+++ b/gpt4all-chat/src/llmodel_provider.inl
@ -0,0 +1,30 @@
 #include <fmt/format.h>
 namespace gpt4all::ui {
 template <typename T, typename S, typename C>
 void GenerationParams::tryParseValue(this S &self, QMap<GenerationParam, QVariant> &values, GenerationParam key,
                                     T C::* dest)
 {
    if (auto value = tryParseValue(values, key, QMetaType::fromType<T>()); value.isValid())
        self.*dest = value.template value<T>();
 }
 template <typename T, typename S, typename C>
 void ModelProviderCustom::setMemberProp(this S &self, T C::* member, std::string_view name, T value)
 {
    auto &mpc = static_cast<ModelProviderCustom &>(self);
    auto &cur = self.*member;
    if (cur != value) {
        cur = std::move(value);
        auto data = mpc.asData();
        if (auto res = mpc.m_store->setData(std::move(data)); !res)
            res.error().raise();
        QMetaObject::invokeMethod(self.asQObject(), fmt::format("{}Changed", name).c_str(), cur);
    }
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/modellist.h
+++ b/gpt4all-chat/src/modellist.h
@ -16,6 +16,7 @@
 #include <QSortFilterProxyModel>
 #include <QSslError>
 #include <QString>
 #include <QUuid>
 #include <QVariant>
 #include <QVector> // IWYU pragma: keep
 #include <Qt>
@ -27,6 +28,7 @@
 // IWYU pragma: no_forward_declare QObject
 // IWYU pragma: no_forward_declare QSslError
 class QUrl;
 namespace gpt4all::ui { class ModelDescription; }
 using namespace Qt::Literals::StringLiterals;
@ -75,6 +77,7 @@ private:
 struct ModelInfo {
    Q_GADGET
    Q_PROPERTY(QString id READ id WRITE setId)
    Q_PROPERTY(const ModelDescription *modelDesc READ modelDescQt WRITE setModelDescQt)
    Q_PROPERTY(QString name READ name WRITE setName)
    Q_PROPERTY(QString filename READ filename WRITE setFilename)
    Q_PROPERTY(QString dirpath MEMBER dirpath)
@ -137,6 +140,13 @@ public:
    QString id() const;
    void setId(const QString &id);
    auto modelDesc() const -> const std::shared_ptr<const gpt4all::ui::ModelDescription> &;
    auto modelDescQt() const -> const gpt4all::ui::ModelDescription *
    { return modelDesc().get(); }
    void setModelDesc(std::shared_ptr<const gpt4all::ui::ModelDescription> value);
    void setModelDescQt(const gpt4all::ui::ModelDescription *); // TODO: implement
    QString name() const;
    void setName(const QString &name);
@ -247,6 +257,7 @@ private:
    QVariant getField(QLatin1StringView name) const;
    QString m_id;
    std::shared_ptr<const gpt4all::ui::ModelDescription> m_modelDesc;
    QString m_name;
    QString m_filename;
    QString m_description;
--- a/gpt4all-chat/src/mysettings.cpp
+++ b/gpt4all-chat/src/mysettings.cpp
@ -48,7 +48,6 @@ namespace ModelSettingsKey { namespace {
 namespace defaults {
 static const int     threadCount             = std::min(4, (int32_t) std::thread::hardware_concurrency());
 static const bool    networkIsActive         = false;
 static const bool    networkUsageStatsActive = false;
 static const QString device                  = "Auto";
@ -254,7 +253,6 @@ void MySettings::restoreApplicationDefaults()
    setChatTheme(basicDefaults.value("chatTheme").value<ChatTheme>());
    setFontSize(basicDefaults.value("fontSize").value<FontSize>());
    setDevice(defaults::device);
    setThreadCount(defaults::threadCount);
    setSystemTray(basicDefaults.value("systemTray").toBool());
    setServerChat(basicDefaults.value("serverChat").toBool());
    setNetworkPort(basicDefaults.value("networkPort").toInt());
@ -596,29 +594,6 @@ void MySettings::setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QS
    setModelSetting("suggestedFollowUpPrompt", info, value, force, true);
 }
 int MySettings::threadCount() const
 {
    int c = m_settings.value("threadCount", defaults::threadCount).toInt();
    // The old thread setting likely left many people with 0 in settings config file, which means
    // we should reset it to the default going forward
    if (c <= 0)
        c = defaults::threadCount;
    c = std::max(c, 1);
    c = std::min(c, QThread::idealThreadCount());
    return c;
 }
 void MySettings::setThreadCount(int value)
 {
    if (threadCount() == value)
        return;
    value = std::max(value, 1);
    value = std::min(value, QThread::idealThreadCount());
    m_settings.setValue("threadCount", value);
    emit threadCountChanged();
 }
 bool        MySettings::systemTray() const              { return getBasicSetting("systemTray"              ).toBool(); }
 bool        MySettings::serverChat() const              { return getBasicSetting("serverChat"              ).toBool(); }
 int         MySettings::networkPort() const             { return getBasicSetting("networkPort"             ).toInt(); }
--- a/gpt4all-chat/src/mysettings.h
+++ b/gpt4all-chat/src/mysettings.h
@ -20,6 +20,7 @@
 // IWYU pragma: no_forward_declare QModelIndex
 class QLocale;
 namespace gpt4all::ui { class GenerationParams; }
 namespace MySettingsEnums {
@ -54,7 +55,6 @@ using namespace MySettingsEnums;
 class MySettings : public QObject
 {
    Q_OBJECT
    Q_PROPERTY(int threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)
    Q_PROPERTY(bool systemTray READ systemTray WRITE setSystemTray NOTIFY systemTrayChanged)
    Q_PROPERTY(bool serverChat READ serverChat WRITE setServerChat NOTIFY serverChatChanged)
    Q_PROPERTY(QString modelPath READ modelPath WRITE setModelPath NOTIFY modelPathChanged)
@ -156,9 +156,10 @@ public:
    QString modelSuggestedFollowUpPrompt(const ModelInfo &info) const;
    Q_INVOKABLE void setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QString &value, bool force = false);
    // TODO: implement
    auto modelGenParams(const ModelInfo &info) -> gpt4all::ui::GenerationParams;
    // Application settings
    int threadCount() const;
    void setThreadCount(int value);
    bool systemTray() const;
    void setSystemTray(bool value);
    bool serverChat() const;
@ -173,10 +174,6 @@ public:
    void setFontSize(FontSize value);
    QString device();
    void setDevice(const QString &value);
    int32_t contextLength() const;
    void setContextLength(int32_t value);
    int32_t gpuLayers() const;
    void setGpuLayers(int32_t value);
    SuggestionMode suggestionMode() const;
    void setSuggestionMode(SuggestionMode value);
@ -231,7 +228,6 @@ Q_SIGNALS:
    void systemMessageChanged(const ModelInfo &info, bool fromInfo = false);
    void chatNamePromptChanged(const ModelInfo &info);
    void suggestedFollowUpPromptChanged(const ModelInfo &info);
    void threadCountChanged();
    void systemTrayChanged();
    void serverChatChanged();
    void modelPathChanged();
--- a/gpt4all-chat/src/server.cpp
+++ b/gpt4all-chat/src/server.cpp
@ -2,6 +2,7 @@
 #include "chat.h"
 #include "chatmodel.h"
 #include "llmodel_description.h"
 #include "modellist.h"
 #include "mysettings.h"
@ -50,8 +51,10 @@
 #include <variant>
 #include <vector>
 using namespace std::string_literals;
 using namespace Qt::Literals::StringLiterals;
 using namespace std::string_literals;
 using namespace gpt4all;
 using namespace gpt4all::ui;
 //#define DEBUG
@ -127,11 +130,11 @@ class BaseCompletionRequest {
 public:
    QString model; // required
    // NB: some parameters are not supported yet
    uint max_tokens = 16;
    qint64 n = 1;
-    float temperature = 1.f;
+    std::optional<uint> max_tokens {};
-    float top_p = 1.f;
+    std::optional<float> temperature {};
-    float min_p = 0.f;
+    std::optional<float> top_p {};
    std::optional<float> min_p {};
    BaseCompletionRequest() = default;
    virtual ~BaseCompletionRequest() = default;
@ -162,7 +165,7 @@ protected:
        value = reqValue("max_tokens", Integer, false, /*min*/ 1);
        if (!value.isNull())
-            this->max_tokens = uint(qMin(value.toInteger(), UINT32_MAX));
+            this->max_tokens = uint(qMin(value.toInteger(), qint64(UINT32_MAX)));
        value = reqValue("n", Integer, false, /*min*/ 1);
        if (!value.isNull())
@ -629,8 +632,6 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
 {
    Q_ASSERT(m_chatModel);
    auto *mySettings = MySettings::globalInstance();
    ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
    const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
    for (const ModelInfo &info : modelList) {
@ -662,22 +663,25 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
        return makeError(QHttpServerResponder::StatusCode::InternalServerError);
    }
    std::unique_ptr<GenerationParams> genParams;
    {
        using enum GenerationParam;
        QMap<GenerationParam, QVariant> values;
        if (auto v = request.max_tokens ) values.insert(NPredict,    *v);
        if (auto v = request.temperature) values.insert(Temperature, *v);
        if (auto v = request.top_p      ) values.insert(TopP,        *v);
        if (auto v = request.min_p      ) values.insert(MinP,        *v);
        try {
            genParams.reset(modelDescription()->makeGenerationParams(values));
        } catch (const std::exception &e) {
            throw InvalidRequestError(e.what());
        }
    }
    // add prompt/response items to GUI
    m_chatModel->appendPrompt(request.prompt);
    m_chatModel->appendResponse();
    // FIXME(jared): taking parameters from the UI inhibits reproducibility of results
    backend::GenerationParams genParams {
        .n_predict      = request.max_tokens,
        .top_k          = mySettings->modelTopK(modelInfo),
        .top_p          = request.top_p,
        .min_p          = request.min_p,
        .temp           = request.temperature,
        .n_batch        = mySettings->modelPromptBatchSize(modelInfo),
        .repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
        .repeat_last_n  = mySettings->modelRepeatPenaltyTokens(modelInfo),
    };
    auto promptUtf8 = request.prompt.toUtf8();
    int promptTokens = 0;
    int responseTokens = 0;
@ -686,7 +690,7 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
        PromptResult result;
        try {
            result = promptInternal(std::string_view(promptUtf8.cbegin(), promptUtf8.cend()),
-                                    genParams,
+                                    *genParams,
                                    /*usedLocalDocs*/ false);
        } catch (const std::exception &e) {
            m_chatModel->setResponseValue(e.what());
@ -733,8 +737,6 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
 auto Server::handleChatRequest(const ChatRequest &request)
    -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
 {
    auto *mySettings = MySettings::globalInstance();
    ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
    const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
    for (const ModelInfo &info : modelList) {
@ -779,17 +781,20 @@ auto Server::handleChatRequest(const ChatRequest &request)
    }
    auto startOffset = m_chatModel->appendResponseWithHistory(messages);
-    // FIXME(jared): taking parameters from the UI inhibits reproducibility of results
+    std::unique_ptr<GenerationParams> genParams;
-    backend::GenerationParams genParams {
+    {
-        .n_predict      = request.max_tokens,
+        using enum GenerationParam;
-        .top_k          = mySettings->modelTopK(modelInfo),
+        QMap<GenerationParam, QVariant> values;
-        .top_p          = request.top_p,
+        if (auto v = request.max_tokens ) values.insert(NPredict,    *v);
-        .min_p          = request.min_p,
+        if (auto v = request.temperature) values.insert(Temperature, *v);
-        .temp           = request.temperature,
+        if (auto v = request.top_p      ) values.insert(TopP,        *v);
-        .n_batch        = mySettings->modelPromptBatchSize(modelInfo),
+        if (auto v = request.min_p      ) values.insert(MinP,        *v);
-        .repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
+        try {
-        .repeat_last_n  = mySettings->modelRepeatPenaltyTokens(modelInfo),
+            genParams.reset(modelDescription()->makeGenerationParams(values));
-    };
+        } catch (const std::exception &e) {
            throw InvalidRequestError(e.what());
        }
    }
    int promptTokens   = 0;
    int responseTokens = 0;
@ -797,7 +802,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
    for (int i = 0; i < request.n; ++i) {
        ChatPromptResult result;
        try {
-            result = promptInternalChat(m_collections, genParams, startOffset);
+            result = promptInternalChat(m_collections, *genParams, startOffset);
        } catch (const std::exception &e) {
            m_chatModel->setResponseValue(e.what());
            m_chatModel->setError();
--- a/gpt4all-chat/src/store_base.cpp
+++ b/gpt4all-chat/src/store_base.cpp
@ -0,0 +1,164 @@
 #include "store_base.h"
 #include <fmt/format.h>
 #include <gpt4all-backend/formatters.h> // IWYU pragma: keep
 #include <QByteArray>
 #include <QDebug>
 #include <QIODevice>
 #include <QLatin1StringView> // IWYU pragma: keep
 #include <QSaveFile>
 #include <QUrl>
 #include <QtAssert>
 #include <QtLogging>
 #include <array>
 #include <stdexcept>
 #include <string>
 #include <system_error>
 namespace fs   = std::filesystem;
 namespace json = boost::json;
 namespace sys  = boost::system;
 using namespace Qt::StringLiterals;
 namespace gpt4all::ui {
 DataStoreError::DataStoreError(const QFileDevice *file)
    : m_error(file->error())
    , m_errorString(file->errorString())
 {
    Q_ASSERT(file->error());
 }
 DataStoreError::DataStoreError(const boost::system::system_error &e)
    : m_error(e.code())
    , m_errorString(QString::fromUtf8(e.what()))
 {
    Q_ASSERT(e.code());
 }
 DataStoreError::DataStoreError(QString e)
    : m_error()
    , m_errorString(e)
    {}
 void DataStoreError::raise() const
 {
    std::visit(Overloaded {
        [&](QFileDevice::FileError    e) { throw FileError(m_errorString, e); },
        [&](boost::system::error_code e) { throw std::runtime_error(m_errorString.toUtf8().constData()); },
        [&](std::monostate             ) { throw std::runtime_error(m_errorString.toUtf8().constData()); },
    }, m_error);
    Q_UNREACHABLE();
 }
 auto DataStoreBase::reload() -> DataStoreResult<>
 {
    if (auto res = clear(); !res)
        return res;
    json::stream_parser parser;
    QFile file;
    for (auto &entry : fs::directory_iterator(m_path)) {
        file.setFileName(entry.path());
        if (!file.open(QFile::ReadOnly)) {
            qWarning().noquote() << "skipping unopenable file:" << file.fileName();
            continue;
        }
        auto jv = read(file, parser);
        if (!jv) {
            (qWarning().nospace() << "skipping " << file.fileName() << "because of read error: ").noquote()
                << jv.error().errorString();
        } else if (auto [unique, uuid] = insert(*jv); !unique)
            qWarning() << "skipping duplicate data store entry:" << uuid;
        file.close();
    }
    return {};
 }
 auto DataStoreBase::setPath(fs::path path) -> DataStoreResult<>
 {
    if (path != m_path) {
        m_path = std::move(path);
        return reload();
    }
    return {};
 }
 auto DataStoreBase::getFilePath(const QString &name) -> std::filesystem::path
 { return m_path / fmt::format("{}.json", QLatin1StringView(normalizeName(name))); }
 auto DataStoreBase::openNew(const QString &name) -> DataStoreResult<std::unique_ptr<QFile>>
 {
    auto path = getFilePath(name);
    auto file = std::make_unique<QFile>(path);
    if (file->exists())
        return std::unexpected(sys::system_error(std::make_error_code(std::errc::file_exists), path.string()));
    if (!file->open(QFile::WriteOnly | QFile::NewOnly))
        return std::unexpected(&*file);
    return file;
 }
 auto DataStoreBase::openExisting(const QString &name) -> DataStoreResult<std::unique_ptr<QSaveFile>>
 {
    auto path = getFilePath(name);
    if (!QFile::exists(path))
        return std::unexpected(sys::system_error(
            std::make_error_code(std::errc::no_such_file_or_directory), path.string()
        ));
    auto file = std::make_unique<QSaveFile>(toQString(path));
    if (!file->open(QSaveFile::WriteOnly | QSaveFile::ExistingOnly))
        return std::unexpected(&*file);
    return file;
 }
 auto DataStoreBase::read(QFileDevice &file, boost::json::stream_parser &parser) -> DataStoreResult<boost::json::value>
 {
    for (;;) {
        auto chunk = file.read(JSON_BUFSIZ);
        if (file.error())
            return std::unexpected(&file);
        if (chunk.isEmpty()) {
            Q_ASSERT(file.atEnd());
            break;
        }
        parser.write(chunk.data(), chunk.size());
    }
    return parser.release();
 }
 auto DataStoreBase::write(const json::value &value, QFileDevice &file) -> DataStoreResult<>
 {
    m_serializer.reset(&value);
    std::array<char, JSON_BUFSIZ> buf;
    while (!m_serializer.done()) {
        auto chunk = m_serializer.read(buf.data(), buf.size());
        qint64 nWritten = file.write(chunk.data(), chunk.size());
        if (nWritten < 0)
            return std::unexpected(&file);
        Q_ASSERT(nWritten == chunk.size());
    }
    if (!file.flush())
        return std::unexpected(&file);
    return {};
 }
 QByteArray DataStoreBase::normalizeName(const QString &name)
 {
    auto lower = name.toLower();
    auto norm = QUrl::toPercentEncoding(lower, /*exclude*/ " !#$%&'()+,;=@[]^`{}"_ba, /*include*/ "~"_ba);
    // "." and ".." are special filenames
    return norm == "."_ba  ? "%2E"_ba    :
           norm == ".."_ba ? "%2E%2E"_ba :
           norm;
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/store_base.h
+++ b/gpt4all-chat/src/store_base.h
@ -0,0 +1,119 @@
 #pragma once
 #include "utils.h" // IWYU pragma: keep
 #include <boost/json.hpp> // IWYU pragma: keep
 #include <boost/system.hpp> // IWYU pragma: keep
 #include <tl/generator.hpp>
 #include <QFile>
 #include <QFileDevice>
 #include <QString>
 #include <QUuid>
 #include <QtTypes> // IWYU pragma: keep
 #include <expected>
 #include <filesystem>
 #include <memory>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <variant>
 class QByteArray;
 class QSaveFile;
 namespace gpt4all::ui {
 class DataStoreError {
 public:
    using ErrorCode = std::variant<
        QFileDevice::FileError,
        boost::system::error_code,
        std::monostate
    >;
    DataStoreError(const QFileDevice *file);
    DataStoreError(const boost::system::system_error &e);
    DataStoreError(QString e);
    [[nodiscard]] const ErrorCode &error      () const { return m_error;       }
    [[nodiscard]] const QString   &errorString() const { return m_errorString; }
    [[noreturn]] void raise() const;
 private:
    ErrorCode m_error;
    QString   m_errorString;
 };
 template <typename T = void>
 using DataStoreResult = std::expected<T, DataStoreError>;
 class DataStoreBase {
 protected:
    explicit DataStoreBase(std::filesystem::path path)
        : m_path(std::move(path))
        {}
 public:
    auto path() const -> const std::filesystem::path & { return m_path; }
    auto setPath(std::filesystem::path path) -> DataStoreResult<>;
 protected:
    auto reload() -> DataStoreResult<>;
    virtual auto clear() -> DataStoreResult<> = 0;
    struct InsertResult { bool unique; QUuid uuid; };
    virtual InsertResult insert(const boost::json::value &jv) = 0;
    // helpers
    auto getFilePath(const QString &name) -> std::filesystem::path;
    auto openNew(const QString &name) -> DataStoreResult<std::unique_ptr<QFile>>;
    auto openExisting(const QString &name) -> DataStoreResult<std::unique_ptr<QSaveFile>>;
    static auto read(QFileDevice &file, boost::json::stream_parser &parser) -> DataStoreResult<boost::json::value>;
    auto write(const boost::json::value &value, QFileDevice &file) -> DataStoreResult<>;
 private:
    static constexpr uint JSON_BUFSIZ = 16384; // default QFILE_WRITEBUFFER_SIZE
    static QByteArray normalizeName(const QString &name);
 protected:
    std::filesystem::path m_path;
 private:
    boost::json::serializer m_serializer;
 };
 template <typename T>
 class DataStore : public DataStoreBase {
 public:
    explicit DataStore(std::filesystem::path path);
    auto list() -> tl::generator<const T &>;
    auto setData(T data) -> DataStoreResult<>;
    auto remove(const QUuid &id) -> DataStoreResult<>;
    auto acquire(QUuid        id) -> DataStoreResult<const T *>;
    auto release(const QUuid &id) -> DataStoreResult<>;
    [[nodiscard]]
    auto operator[](const QUuid &id) const -> const T &
    { return m_entries.at(id); }
 protected:
    auto createImpl(T data, const QString &name) -> DataStoreResult<const T *>;
    auto clear() -> DataStoreResult<> final;
    InsertResult insert(const boost::json::value &jv) override;
 private:
    std::unordered_map<QUuid, T> m_entries;
    std::unordered_set<QUuid>    m_acquired;
 };
 } // namespace gpt4all::ui
 #include "store_base.inl" // IWYU pragma: export
--- a/gpt4all-chat/src/store_base.inl
+++ b/gpt4all-chat/src/store_base.inl
@ -0,0 +1,128 @@
 #include "json-helpers.h" // IWYU pragma: keep
 #include <boost/json.hpp> // IWYU pragma: keep
 #include <gpt4all-backend/json-helpers.h> // IWYU pragma: keep
 #include <QSaveFile>
 #include <QtAssert>
 namespace gpt4all::ui {
 template <typename T>
 DataStore<T>::DataStore(std::filesystem::path path)
    : DataStoreBase(std::move(path))
 {
    if (auto res = reload(); !res)
        res.error().raise(); // should be impossible
 }
 template <typename T>
 auto DataStore<T>::list() -> tl::generator<const T &>
 {
    for (auto &[_, value] : m_entries)
        co_yield value;
 }
 template <typename T>
 auto DataStore<T>::createImpl(T data, const QString &name) -> DataStoreResult<const T *>
 {
    // acquire path
    auto file = openNew(name);
    if (!file)
        return std::unexpected(file.error());
    // serialize
    if (auto res = write(boost::json::value_from(data), **file); !res)
        return std::unexpected(res.error());
    // insert
    auto [it, unique] = m_entries.emplace(data.id, std::move(data));
    Q_ASSERT(unique);
    // acquire data ownership
    if (auto res = acquire(data.id); !res)
        return std::unexpected(res.error());
    return &it->second;
 }
 template <typename T>
 auto DataStore<T>::setData(T data) -> DataStoreResult<>
 {
    // acquire path
    auto file = openExisting(data.name);
    if (!file)
        return std::unexpected(file.error());
    // serialize
    if (auto res = write(boost::json::value_from(data), **file); !res)
        return std::unexpected(res.error());
    if (!(*file)->commit())
        return std::unexpected(file->get());
    // update
    m_entries.at(data.id) = std::move(data);
    return {};
 }
 template <typename T>
 auto DataStore<T>::remove(const QUuid &id) -> DataStoreResult<>
 {
    // acquire UUID
    auto it = m_entries.find(id);
    if (it == m_entries.end())
        return std::unexpected(QStringLiteral("id not found: %1").arg(id.toString()));
    auto &[_, data] = *it;
    // remove the path
    auto path = getFilePath(data.name);
    QFile file(path);
    if (!file.remove())
        throw std::unexpected(&file);
    // update cache
    m_entries.erase(it);
    return {};
 }
 template <typename T>
 auto DataStore<T>::acquire(QUuid id) -> DataStoreResult<const T *>
 {
    auto [it, unique] = m_acquired.insert(std::move(id));
    if (!unique)
        return std::unexpected(QStringLiteral("id already acquired: %1").arg(id.toString()));
    return &(*this)[*it];
 }
 template <typename T>
 auto DataStore<T>::release(const QUuid &id) -> DataStoreResult<>
 {
    if (!m_acquired.erase(id))
        return std::unexpected(QStringLiteral("id not acquired: %1").arg(id.toString()));
    return {};
 }
 template <typename T>
 auto DataStore<T>::clear() -> DataStoreResult<>
 {
    if (!m_acquired.empty())
        return std::unexpected(QStringLiteral("cannot clear data store with living references"));
    m_entries.clear();
    return {};
 }
 template <typename T>
 auto DataStore<T>::insert(const boost::json::value &jv) -> InsertResult
 {
    auto data = boost::json::value_to<T>(jv);
    auto id = data.id;
    auto [_, ok] = m_entries.emplace(id, std::move(data));
    return { ok, std::move(id) };
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/store_provider.cpp
+++ b/gpt4all-chat/src/store_provider.cpp
@ -0,0 +1,25 @@
 #include "store_provider.h"
 #include <utility>
 namespace gpt4all::ui {
 auto ProviderStore::create(QString name, QUrl base_url, QString api_key)
    -> DataStoreResult<const ModelProviderData *>
 {
    ModelProviderData data { QUuid::createUuid(), ProviderType::openai, name, std::move(base_url),
                             OpenaiProviderDetails { std::move(api_key) }                          };
    return createImpl(std::move(data), name);
 }
 auto ProviderStore::create(QString name, QUrl base_url)
    -> DataStoreResult<const ModelProviderData *>
 {
    ModelProviderData data { QUuid::createUuid(), ProviderType::ollama, name, std::move(base_url) };
    return createImpl(std::move(data), name);
 }
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/store_provider.h
+++ b/gpt4all-chat/src/store_provider.h
@ -0,0 +1,49 @@
 #pragma once
 #include "store_base.h"
 #include <boost/describe/class.hpp>
 #include <boost/describe/enum.hpp>
 #include <QString>
 #include <QUrl>
 #include <QUuid>
 #include <variant>
 namespace gpt4all::ui {
 BOOST_DEFINE_ENUM_CLASS(ProviderType, openai, ollama)
 struct OpenaiProviderDetails {
    QString api_key;
 };
 BOOST_DESCRIBE_STRUCT(OpenaiProviderDetails, (), (api_key))
 struct ModelProviderData {
    using Details = std::variant<std::monostate, OpenaiProviderDetails>;
    QUuid        id;
    ProviderType type;
    QString      name;
    QUrl         base_url;
    Details      details;
 };
 BOOST_DESCRIBE_STRUCT(ModelProviderData, (), (id, type, name, base_url, details))
 class ProviderStore : public DataStore<ModelProviderData> {
 private:
    using Super = DataStore<ModelProviderData>;
 public:
    using Super::Super;
    /// OpenAI
    auto create(QString name, QUrl base_url, QString api_key) -> DataStoreResult<const ModelProviderData *>;
    /// Ollama
    auto create(QString name, QUrl base_url) -> DataStoreResult<const ModelProviderData *>;
 };
 } // namespace gpt4all::ui
--- a/gpt4all-chat/src/utils.h
+++ b/gpt4all-chat/src/utils.h
@ -1,16 +1,56 @@
 #pragma once
 #include <QFileDevice>
 #include <QHash>
 #include <QJsonValue>
 #include <QBitArray> // for qHash overload // IWYU pragma: keep
 #include <QLatin1StringView> // IWYU pragma: keep
 #include <concepts>
 #include <filesystem>
 #include <functional>
 #include <initializer_list>
 #include <stdexcept>
 #include <utility> // IWYU pragma: keep
 // IWYU pragma: no_forward_declare QJsonValue
 class QJsonObject;
 class QVariant;
 template <typename Key, typename T> class QMap;
 // alternative to QJsonObject's initializer_list constructor that accepts Latin-1 strings
 QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args);
 QJsonObject &extend(QJsonObject &obj, const QMap<QLatin1StringView, QVariant> &values);
 QString toQString(const std::filesystem::path &path);
 auto    toFSPath (const QString &str) -> std::filesystem::path;
 template <typename T>
 concept QHashable = requires(const T &x) {
    { qHash(x) } -> std::same_as<size_t>;
 };
 template <QHashable T>
 struct std::hash<T> {
    size_t operator()(const T &value) const noexcept
    { return qHash(value); }
 };
 class FileError : public std::runtime_error {
 public:
    explicit FileError(const QFileDevice *file)
        : FileError(file->errorString(), file->error()) {}
    explicit FileError(const QString &str, QFileDevice::FileError code);
    QFileDevice::FileError code() const noexcept { return m_code; }
 private:
    QFileDevice::FileError m_code;
 };
 template <typename... Ts>
 struct Overloaded : Ts... { using Ts::operator()...; };
 #include "utils.inl" // IWYU pragma: export
--- a/gpt4all-chat/src/utils.inl
+++ b/gpt4all-chat/src/utils.inl
@ -1,4 +1,7 @@
 #include <QJsonObject>
 #include <QMap>
 #include <QVariant>
 #include <QtAssert>
 inline QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args)
@ -8,3 +11,34 @@ inline QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringV
        obj.insert(arg.first, arg.second);
    return obj;
 }
 inline QJsonObject &extend(QJsonObject &obj, const QMap<QLatin1StringView, QVariant> &values)
 {
    for (auto [key, value] : values.asKeyValueRange())
        obj.insert(key, QJsonValue::fromVariant(value));
    return obj;
 }
 // copied from qfile.h
 inline QString toQString(const std::filesystem::path &path)
 {
 #ifdef Q_OS_WIN
    return QString::fromStdWString(path.native());
 #else
    return QString::fromStdString(path.native());
 #endif
 }
 // copied from qfile.h
 inline auto toFSPath(const QString &str) -> std::filesystem::path
 {
    return { reinterpret_cast<const char16_t *>(str.cbegin()),
             reinterpret_cast<const char16_t *>(str.cend  ())  };
 }
 FileError::FileError(const QString &str, QFileDevice::FileError code)
    : std::runtime_error(str.toUtf8().constData())
    , m_code(code)
 {
    Q_ASSERT(code);
 }
		`@ -0,0 +1 @@`
							`Subproject commit 5bdb7e6f31fac909c090a46dbd9fea27b6e609a4`
		`@ -0,0 +1 @@`
							`Subproject commit 2a912502de4f97dcba4f95c958ee0ddf7bc22cf5`