mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-06 20:09:58 +00:00
WIP (clang is crashing)
This commit is contained in:
parent
1dc9f22d5b
commit
7745f208bc
@ -1,3 +1,3 @@
|
|||||||
[codespell]
|
[codespell]
|
||||||
ignore-words-list = blong, afterall, assistent, crasher, requestor
|
ignore-words-list = blong, afterall, assistent, crasher, requestor, nam
|
||||||
skip = ./.git,./gpt4all-chat/translations,*.pdf,*.svg,*.lock
|
skip = ./.git,./gpt4all-chat/translations,*.pdf,*.svg,*.lock
|
||||||
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -26,3 +26,9 @@
|
|||||||
[submodule "gpt4all-backend/deps/qcoro"]
|
[submodule "gpt4all-backend/deps/qcoro"]
|
||||||
path = deps/qcoro
|
path = deps/qcoro
|
||||||
url = https://github.com/nomic-ai/qcoro.git
|
url = https://github.com/nomic-ai/qcoro.git
|
||||||
|
[submodule "gpt4all-backend/deps/date"]
|
||||||
|
path = gpt4all-backend/deps/date
|
||||||
|
url = https://github.com/HowardHinnant/date.git
|
||||||
|
[submodule "gpt4all-chat/deps/generator"]
|
||||||
|
path = gpt4all-chat/deps/generator
|
||||||
|
url = https://github.com/TartanLlama/generator.git
|
||||||
|
@ -49,7 +49,6 @@ You can **clone** an existing model, which allows you to save a configuration of
|
|||||||
|----------------------------|------------------------------------------|-----------|
|
|----------------------------|------------------------------------------|-----------|
|
||||||
| **Context Length** | Maximum length of input sequence in tokens | 2048 |
|
| **Context Length** | Maximum length of input sequence in tokens | 2048 |
|
||||||
| **Max Length** | Maximum length of response in tokens | 4096 |
|
| **Max Length** | Maximum length of response in tokens | 4096 |
|
||||||
| **Prompt Batch Size** | Token batch size for parallel processing | 128 |
|
|
||||||
| **Temperature** | Lower temperature gives more likely generations | 0.7 |
|
| **Temperature** | Lower temperature gives more likely generations | 0.7 |
|
||||||
| **Top P** | Prevents choosing highly unlikely tokens | 0.4 |
|
| **Top P** | Prevents choosing highly unlikely tokens | 0.4 |
|
||||||
| **Top K** | Size of selection pool for tokens | 40 |
|
| **Top K** | Size of selection pool for tokens | 40 |
|
||||||
|
@ -12,3 +12,5 @@ FetchContent_Declare(
|
|||||||
URL_HASH "SHA256=7da75f171837577a52bbf217e17f8ea576c7c246e4594d617bfde7fafd408be5"
|
URL_HASH "SHA256=7da75f171837577a52bbf217e17f8ea576c7c246e4594d617bfde7fafd408be5"
|
||||||
)
|
)
|
||||||
FetchContent_MakeAvailable(boost)
|
FetchContent_MakeAvailable(boost)
|
||||||
|
|
||||||
|
add_subdirectory(date)
|
||||||
|
1
gpt4all-backend/deps/date
Submodule
1
gpt4all-backend/deps/date
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 5bdb7e6f31fac909c090a46dbd9fea27b6e609a4
|
@ -26,7 +26,8 @@
|
|||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_FORMATTER(QUtf8StringView, value );
|
MAKE_FORMATTER(QLatin1StringView, value );
|
||||||
MAKE_FORMATTER(QStringView, value.toUtf8() );
|
MAKE_FORMATTER(QString, value.toUtf8() );
|
||||||
MAKE_FORMATTER(QString, value.toUtf8() );
|
MAKE_FORMATTER(QStringView, value.toUtf8() );
|
||||||
MAKE_FORMATTER(QVariant, value.toString().toUtf8());
|
MAKE_FORMATTER(QUtf8StringView, value );
|
||||||
|
MAKE_FORMATTER(QVariant, value.toString().toUtf8());
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
|
|
||||||
#include <cassert>
|
|
||||||
#include <expected>
|
#include <expected>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <variant>
|
#include <variant>
|
||||||
@ -26,26 +25,21 @@ namespace gpt4all::backend {
|
|||||||
struct ResponseError {
|
struct ResponseError {
|
||||||
public:
|
public:
|
||||||
struct BadStatus { int code; };
|
struct BadStatus { int code; };
|
||||||
|
|
||||||
private:
|
|
||||||
using ErrorCode = std::variant<
|
using ErrorCode = std::variant<
|
||||||
QNetworkReply::NetworkError,
|
QNetworkReply::NetworkError,
|
||||||
boost::system::error_code,
|
boost::system::error_code,
|
||||||
BadStatus
|
BadStatus
|
||||||
>;
|
>;
|
||||||
|
|
||||||
public:
|
|
||||||
ErrorCode error;
|
|
||||||
QString errorString;
|
|
||||||
|
|
||||||
ResponseError(const QRestReply *reply);
|
ResponseError(const QRestReply *reply);
|
||||||
|
ResponseError(const boost::system::system_error &e);
|
||||||
|
|
||||||
ResponseError(const boost::system::system_error &e)
|
const ErrorCode &error () { return m_error; }
|
||||||
: error(e.code())
|
const QString &errorString() { return m_errorString; }
|
||||||
, errorString(QString::fromUtf8(e.what()))
|
|
||||||
{
|
private:
|
||||||
assert(e.code());
|
ErrorCode m_error;
|
||||||
}
|
QString m_errorString;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -21,6 +21,7 @@ target_link_libraries(${TARGET} PUBLIC
|
|||||||
)
|
)
|
||||||
target_link_libraries(${TARGET} PRIVATE
|
target_link_libraries(${TARGET} PRIVATE
|
||||||
QCoro6::Network
|
QCoro6::Network
|
||||||
|
date::date
|
||||||
fmt::fmt
|
fmt::fmt
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
#include <QVariant>
|
#include <QVariant>
|
||||||
#include <QtAssert>
|
#include <QtAssert>
|
||||||
|
|
||||||
#include <coroutine>
|
|
||||||
#include <expected>
|
#include <expected>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
@ -28,13 +27,20 @@ namespace gpt4all::backend {
|
|||||||
ResponseError::ResponseError(const QRestReply *reply)
|
ResponseError::ResponseError(const QRestReply *reply)
|
||||||
{
|
{
|
||||||
if (reply->hasError()) {
|
if (reply->hasError()) {
|
||||||
error = reply->networkReply()->error();
|
m_error = reply->networkReply()->error();
|
||||||
} else if (!reply->isHttpStatusSuccess()) {
|
} else if (!reply->isHttpStatusSuccess()) {
|
||||||
error = BadStatus(reply->httpStatus());
|
m_error = BadStatus(reply->httpStatus());
|
||||||
} else
|
} else
|
||||||
Q_UNREACHABLE();
|
Q_UNREACHABLE();
|
||||||
|
|
||||||
errorString = restErrorString(*reply);
|
m_errorString = restErrorString(*reply);
|
||||||
|
}
|
||||||
|
|
||||||
|
ResponseError::ResponseError(const boost::system::system_error &e)
|
||||||
|
: m_error(e.code())
|
||||||
|
, m_errorString(QString::fromUtf8(e.what()))
|
||||||
|
{
|
||||||
|
Q_ASSERT(e.code());
|
||||||
}
|
}
|
||||||
|
|
||||||
QNetworkRequest OllamaClient::makeRequest(const QString &path) const
|
QNetworkRequest OllamaClient::makeRequest(const QString &path) const
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include <fmt/chrono.h> // IWYU pragma: keep
|
#include <fmt/chrono.h> // IWYU pragma: keep
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
#include <date/date.h>
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -40,7 +41,7 @@ Time tag_invoke(const json::value_to_tag<Time> &, const json::value &value)
|
|||||||
|
|
||||||
Time time;
|
Time time;
|
||||||
std::istringstream iss(json::string_view(value.as_string()));
|
std::istringstream iss(json::string_view(value.as_string()));
|
||||||
iss >> std::chrono::parse("%FT%T%Ez", time);
|
iss >> date::parse("%FT%T%Ez", time);
|
||||||
if (!iss && !iss.eof())
|
if (!iss && !iss.eof())
|
||||||
throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
|
throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
|
||||||
return time;
|
return time;
|
||||||
|
@ -227,9 +227,6 @@ if (APPLE)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
qt_add_executable(chat
|
qt_add_executable(chat
|
||||||
src/llmodel/provider.cpp src/llmodel/provider.h
|
|
||||||
src/llmodel/openai.cpp src/llmodel/openai.h
|
|
||||||
src/main.cpp
|
|
||||||
src/chat.cpp src/chat.h
|
src/chat.cpp src/chat.h
|
||||||
src/chatlistmodel.cpp src/chatlistmodel.h
|
src/chatlistmodel.cpp src/chatlistmodel.h
|
||||||
src/chatllm.cpp src/chatllm.h
|
src/chatllm.cpp src/chatllm.h
|
||||||
@ -241,14 +238,22 @@ qt_add_executable(chat
|
|||||||
src/embllm.cpp src/embllm.h
|
src/embllm.cpp src/embllm.h
|
||||||
src/jinja_helpers.cpp src/jinja_helpers.h
|
src/jinja_helpers.cpp src/jinja_helpers.h
|
||||||
src/jinja_replacements.cpp src/jinja_replacements.h
|
src/jinja_replacements.cpp src/jinja_replacements.h
|
||||||
|
src/json-helpers.cpp src/json-helpers.h
|
||||||
src/llm.cpp src/llm.h
|
src/llm.cpp src/llm.h
|
||||||
|
src/llmodel_chat.h
|
||||||
|
src/llmodel_ollama.cpp src/llmodel_ollama.h
|
||||||
|
src/llmodel_openai.cpp src/llmodel_openai.h
|
||||||
|
src/llmodel_provider.cpp src/llmodel_provider.h
|
||||||
src/localdocs.cpp src/localdocs.h
|
src/localdocs.cpp src/localdocs.h
|
||||||
src/localdocsmodel.cpp src/localdocsmodel.h
|
src/localdocsmodel.cpp src/localdocsmodel.h
|
||||||
src/logger.cpp src/logger.h
|
src/logger.cpp src/logger.h
|
||||||
|
src/main.cpp
|
||||||
src/modellist.cpp src/modellist.h
|
src/modellist.cpp src/modellist.h
|
||||||
src/mysettings.cpp src/mysettings.h
|
src/mysettings.cpp src/mysettings.h
|
||||||
src/network.cpp src/network.h
|
src/network.cpp src/network.h
|
||||||
src/server.cpp src/server.h
|
src/server.cpp src/server.h
|
||||||
|
src/store_base.cpp src/store_base.h
|
||||||
|
src/store_provider.cpp src/store_provider.h
|
||||||
src/tool.cpp src/tool.h
|
src/tool.cpp src/tool.h
|
||||||
src/toolcallparser.cpp src/toolcallparser.h
|
src/toolcallparser.cpp src/toolcallparser.h
|
||||||
src/toolmodel.cpp src/toolmodel.h
|
src/toolmodel.cpp src/toolmodel.h
|
||||||
@ -448,8 +453,9 @@ target_compile_definitions(chat PRIVATE QT_NO_SIGNALS_SLOTS_KEYWORDS)
|
|||||||
target_include_directories(chat PRIVATE deps/usearch/include
|
target_include_directories(chat PRIVATE deps/usearch/include
|
||||||
deps/usearch/fp16/include)
|
deps/usearch/fp16/include)
|
||||||
|
|
||||||
target_link_libraries(chat
|
target_link_libraries(chat PRIVATE
|
||||||
PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg)
|
Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg
|
||||||
|
)
|
||||||
if (GPT4ALL_USING_QTPDF)
|
if (GPT4ALL_USING_QTPDF)
|
||||||
target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF)
|
target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF)
|
||||||
target_link_libraries(chat PRIVATE Qt6::Pdf)
|
target_link_libraries(chat PRIVATE Qt6::Pdf)
|
||||||
@ -458,6 +464,7 @@ else()
|
|||||||
target_link_libraries(chat PRIVATE pdfium)
|
target_link_libraries(chat PRIVATE pdfium)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(chat PRIVATE
|
target_link_libraries(chat PRIVATE
|
||||||
|
Boost::describe Boost::json Boost::system
|
||||||
QCoro6::Core QCoro6::Network
|
QCoro6::Core QCoro6::Network
|
||||||
QXlsx
|
QXlsx
|
||||||
SingleApplication
|
SingleApplication
|
||||||
@ -466,6 +473,7 @@ target_link_libraries(chat PRIVATE
|
|||||||
gpt4all-backend
|
gpt4all-backend
|
||||||
llmodel
|
llmodel
|
||||||
nlohmann_json::nlohmann_json
|
nlohmann_json::nlohmann_json
|
||||||
|
tl::generator
|
||||||
)
|
)
|
||||||
target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/minja/include)
|
target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/minja/include)
|
||||||
|
|
||||||
|
@ -14,6 +14,10 @@ add_subdirectory(QXlsx/QXlsx)
|
|||||||
|
|
||||||
add_subdirectory(json) # required by minja
|
add_subdirectory(json) # required by minja
|
||||||
|
|
||||||
|
# TartanLlama
|
||||||
|
set(FUNCTION_REF_ENABLE_TESTS OFF)
|
||||||
|
add_subdirectory(generator)
|
||||||
|
|
||||||
if (NOT GPT4ALL_USING_QTPDF)
|
if (NOT GPT4ALL_USING_QTPDF)
|
||||||
# If we do not use QtPDF, we need to get PDFium.
|
# If we do not use QtPDF, we need to get PDFium.
|
||||||
set(GPT4ALL_PDFIUM_TAG "chromium/6996")
|
set(GPT4ALL_PDFIUM_TAG "chromium/6996")
|
||||||
|
1
gpt4all-chat/deps/generator
Submodule
1
gpt4all-chat/deps/generator
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 2a912502de4f97dcba4f95c958ee0ddf7bc22cf5
|
@ -454,38 +454,6 @@ MySettingsTab {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MySettingsLabel {
|
|
||||||
id: nThreadsLabel
|
|
||||||
text: qsTr("CPU Threads")
|
|
||||||
helpText: qsTr("The number of CPU threads used for inference and embedding.")
|
|
||||||
Layout.row: 11
|
|
||||||
Layout.column: 0
|
|
||||||
}
|
|
||||||
MyTextField {
|
|
||||||
text: MySettings.threadCount
|
|
||||||
color: theme.textColor
|
|
||||||
font.pixelSize: theme.fontSizeLarge
|
|
||||||
Layout.alignment: Qt.AlignRight
|
|
||||||
Layout.row: 11
|
|
||||||
Layout.column: 2
|
|
||||||
Layout.minimumWidth: 200
|
|
||||||
Layout.maximumWidth: 200
|
|
||||||
validator: IntValidator {
|
|
||||||
bottom: 1
|
|
||||||
}
|
|
||||||
onEditingFinished: {
|
|
||||||
var val = parseInt(text)
|
|
||||||
if (!isNaN(val)) {
|
|
||||||
MySettings.threadCount = val
|
|
||||||
focus = false
|
|
||||||
} else {
|
|
||||||
text = MySettings.threadCount
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Accessible.role: Accessible.EditableText
|
|
||||||
Accessible.name: nThreadsLabel.text
|
|
||||||
Accessible.description: ToolTip.text
|
|
||||||
}
|
|
||||||
MySettingsLabel {
|
MySettingsLabel {
|
||||||
id: trayLabel
|
id: trayLabel
|
||||||
text: qsTr("Enable System Tray")
|
text: qsTr("Enable System Tray")
|
||||||
|
@ -791,53 +791,6 @@ MySettingsTab {
|
|||||||
Accessible.description: ToolTip.text
|
Accessible.description: ToolTip.text
|
||||||
}
|
}
|
||||||
|
|
||||||
MySettingsLabel {
|
|
||||||
id: batchSizeLabel
|
|
||||||
visible: !root.currentModelInfo.isOnline
|
|
||||||
text: qsTr("Prompt Batch Size")
|
|
||||||
helpText: qsTr("The batch size used for prompt processing.")
|
|
||||||
Layout.row: 1
|
|
||||||
Layout.column: 0
|
|
||||||
Layout.maximumWidth: 300 * theme.fontScale
|
|
||||||
}
|
|
||||||
MyTextField {
|
|
||||||
id: batchSizeField
|
|
||||||
visible: !root.currentModelInfo.isOnline
|
|
||||||
text: root.currentModelInfo.promptBatchSize
|
|
||||||
color: theme.textColor
|
|
||||||
font.pixelSize: theme.fontSizeLarge
|
|
||||||
ToolTip.text: qsTr("Amount of prompt tokens to process at once.\nNOTE: Higher values can speed up reading prompts but will use more RAM.")
|
|
||||||
ToolTip.visible: hovered
|
|
||||||
Layout.row: 1
|
|
||||||
Layout.column: 1
|
|
||||||
validator: IntValidator {
|
|
||||||
bottom: 1
|
|
||||||
}
|
|
||||||
Connections {
|
|
||||||
target: MySettings
|
|
||||||
function onPromptBatchSizeChanged() {
|
|
||||||
batchSizeField.text = root.currentModelInfo.promptBatchSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Connections {
|
|
||||||
target: root
|
|
||||||
function onCurrentModelInfoChanged() {
|
|
||||||
batchSizeField.text = root.currentModelInfo.promptBatchSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
onEditingFinished: {
|
|
||||||
var val = parseInt(text)
|
|
||||||
if (!isNaN(val)) {
|
|
||||||
MySettings.setModelPromptBatchSize(root.currentModelInfo, val)
|
|
||||||
focus = false
|
|
||||||
} else {
|
|
||||||
text = root.currentModelInfo.promptBatchSize
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Accessible.role: Accessible.EditableText
|
|
||||||
Accessible.name: batchSizeLabel.text
|
|
||||||
Accessible.description: ToolTip.text
|
|
||||||
}
|
|
||||||
MySettingsLabel {
|
MySettingsLabel {
|
||||||
id: repeatPenaltyLabel
|
id: repeatPenaltyLabel
|
||||||
visible: !root.currentModelInfo.isOnline
|
visible: !root.currentModelInfo.isOnline
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
|
|
||||||
static constexpr quint32 CHAT_FORMAT_MAGIC = 0xF5D553CC;
|
static constexpr quint32 CHAT_FORMAT_MAGIC = 0xF5D553CC;
|
||||||
static constexpr qint32 CHAT_FORMAT_VERSION = 12;
|
static constexpr qint32 CHAT_FORMAT_VERSION = 13;
|
||||||
|
|
||||||
class MyChatListModel: public ChatListModel { };
|
class MyChatListModel: public ChatListModel { };
|
||||||
Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance)
|
Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance)
|
||||||
|
@ -3,8 +3,9 @@
|
|||||||
#include "chat.h"
|
#include "chat.h"
|
||||||
#include "chatmodel.h"
|
#include "chatmodel.h"
|
||||||
#include "jinja_helpers.h"
|
#include "jinja_helpers.h"
|
||||||
#include "llmodel/chat.h"
|
#include "llmodel_chat.h"
|
||||||
#include "llmodel/openai.h"
|
#include "llmodel_description.h"
|
||||||
|
#include "llmodel_provider.h"
|
||||||
#include "localdocs.h"
|
#include "localdocs.h"
|
||||||
#include "mysettings.h"
|
#include "mysettings.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
@ -12,6 +13,8 @@
|
|||||||
#include "toolcallparser.h"
|
#include "toolcallparser.h"
|
||||||
#include "toolmodel.h"
|
#include "toolmodel.h"
|
||||||
|
|
||||||
|
#include <QCoro/QCoroAsyncGenerator>
|
||||||
|
#include <QCoro/QCoroTask>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <gpt4all-backend/generation-params.h>
|
#include <gpt4all-backend/generation-params.h>
|
||||||
#include <minja/minja.hpp>
|
#include <minja/minja.hpp>
|
||||||
@ -118,10 +121,15 @@ public:
|
|||||||
virtual bool getStopGenerating () const = 0;
|
virtual bool getStopGenerating () const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PromptModelWithToolsResult {
|
||||||
|
ChatResponseMetadata metadata;
|
||||||
|
QStringList toolCallBuffers;
|
||||||
|
bool shouldExecuteToolCall;
|
||||||
|
};
|
||||||
static auto promptModelWithTools(
|
static auto promptModelWithTools(
|
||||||
ChatLLModel *model, BaseResponseHandler &respHandler, const backend::GenerationParams ¶ms,
|
ChatLLMInstance *model, BaseResponseHandler &respHandler, const GenerationParams ¶ms, const QByteArray &prompt,
|
||||||
const QByteArray &prompt, const QStringList &toolNames
|
const QStringList &toolNames
|
||||||
) -> std::pair<QStringList, bool>
|
) -> QCoro::Task<PromptModelWithToolsResult>
|
||||||
{
|
{
|
||||||
ToolCallParser toolCallParser(toolNames);
|
ToolCallParser toolCallParser(toolNames);
|
||||||
auto handleResponse = [&toolCallParser, &respHandler](std::string_view piece) -> bool {
|
auto handleResponse = [&toolCallParser, &respHandler](std::string_view piece) -> bool {
|
||||||
@ -159,30 +167,31 @@ static auto promptModelWithTools(
|
|||||||
|
|
||||||
return !shouldExecuteToolCall && !respHandler.getStopGenerating();
|
return !shouldExecuteToolCall && !respHandler.getStopGenerating();
|
||||||
};
|
};
|
||||||
model->prompt(std::string_view(prompt), promptCallback, handleResponse, params);
|
ChatResponseMetadata metadata;
|
||||||
|
auto stream = model->generate(QString::fromUtf8(prompt), params, metadata);
|
||||||
|
QCORO_FOREACH(auto &piece, stream) {
|
||||||
|
if (!handleResponse(std::string_view(piece.toUtf8())))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
|
const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
|
||||||
&& toolCallParser.startTag() != ToolCallConstants::ThinkStartTag;
|
&& toolCallParser.startTag() != ToolCallConstants::ThinkStartTag;
|
||||||
|
|
||||||
return { toolCallParser.buffers(), shouldExecuteToolCall };
|
co_return { metadata, toolCallParser.buffers(), shouldExecuteToolCall };
|
||||||
}
|
}
|
||||||
|
|
||||||
class LLModelStore {
|
class LLModelStore {
|
||||||
public:
|
public:
|
||||||
static LLModelStore *globalInstance();
|
static LLModelStore *globalInstance();
|
||||||
|
|
||||||
LLModelInfo acquireModel(); // will block until llmodel is ready
|
auto acquireModel() -> std::unique_ptr<ChatLLMInstance>; // will block until llmodel is ready
|
||||||
void releaseModel(LLModelInfo &&info); // must be called when you are done
|
void releaseModel(std::unique_ptr<ChatLLMInstance> &&info); // must be called when you are done
|
||||||
void destroy();
|
void destroy();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LLModelStore()
|
LLModelStore() { m_availableModel.emplace(); /* seed with empty model */ }
|
||||||
{
|
~LLModelStore() = default;
|
||||||
// seed with empty model
|
std::optional<std::unique_ptr<ChatLLMInstance>> m_availableModel;
|
||||||
m_availableModel = LLModelInfo();
|
|
||||||
}
|
|
||||||
~LLModelStore() {}
|
|
||||||
std::optional<LLModelInfo> m_availableModel;
|
|
||||||
QMutex m_mutex;
|
QMutex m_mutex;
|
||||||
QWaitCondition m_condition;
|
QWaitCondition m_condition;
|
||||||
friend class MyLLModelStore;
|
friend class MyLLModelStore;
|
||||||
@ -195,7 +204,7 @@ LLModelStore *LLModelStore::globalInstance()
|
|||||||
return storeInstance();
|
return storeInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
LLModelInfo LLModelStore::acquireModel()
|
auto LLModelStore::acquireModel() -> std::unique_ptr<ChatLLMInstance>
|
||||||
{
|
{
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
while (!m_availableModel)
|
while (!m_availableModel)
|
||||||
@ -205,7 +214,7 @@ LLModelInfo LLModelStore::acquireModel()
|
|||||||
return first;
|
return first;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLModelStore::releaseModel(LLModelInfo &&info)
|
void LLModelStore::releaseModel(std::unique_ptr<ChatLLMInstance> &&info)
|
||||||
{
|
{
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
Q_ASSERT(!m_availableModel);
|
Q_ASSERT(!m_availableModel);
|
||||||
@ -219,11 +228,6 @@ void LLModelStore::destroy()
|
|||||||
m_availableModel.reset();
|
m_availableModel.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLModelInfo::resetModel(ChatLLM *cllm, ChatLLModel *model) {
|
|
||||||
this->model.reset(model);
|
|
||||||
emit cllm->loadedModelInfoChanged();
|
|
||||||
}
|
|
||||||
|
|
||||||
ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
||||||
: QObject{nullptr}
|
: QObject{nullptr}
|
||||||
, m_chat(parent)
|
, m_chat(parent)
|
||||||
@ -264,9 +268,8 @@ void ChatLLM::destroy()
|
|||||||
|
|
||||||
// The only time we should have a model loaded here is on shutdown
|
// The only time we should have a model loaded here is on shutdown
|
||||||
// as we explicitly unload the model in all other circumstances
|
// as we explicitly unload the model in all other circumstances
|
||||||
if (isModelLoaded()) {
|
if (isModelLoaded())
|
||||||
m_llModelInfo.resetModel(this);
|
m_llmInstance.reset();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::destroyStore()
|
void ChatLLM::destroyStore()
|
||||||
@ -288,7 +291,7 @@ bool ChatLLM::loadDefaultModel()
|
|||||||
emit modelLoadingError(u"Could not find any model to load"_s);
|
emit modelLoadingError(u"Could not find any model to load"_s);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return loadModel(defaultModel);
|
return QCoro::waitFor(loadModel(defaultModel));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
|
void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
|
||||||
@ -305,24 +308,21 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
QString filePath = modelInfo.dirpath + modelInfo.filename();
|
|
||||||
QFileInfo fileInfo(filePath);
|
|
||||||
|
|
||||||
acquireModel();
|
acquireModel();
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llmInstance.get();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The store gave us no already loaded model, the wrong type of model, then give it back to the
|
// The store gave us no already loaded model, the wrong type of model, then give it back to the
|
||||||
// store and fail
|
// store and fail
|
||||||
if (!m_llModelInfo.model || m_llModelInfo.fileInfo != fileInfo || !m_shouldBeLoaded) {
|
if (!m_llmInstance || *m_llmInstance->description() != *modelInfo.modelDesc() || !m_shouldBeLoaded) {
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
|
||||||
emit trySwitchContextOfLoadedModelCompleted(0);
|
emit trySwitchContextOfLoadedModelCompleted(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
qDebug() << "store had our model" << m_llmThread.objectName() << m_llmInstance.model.get();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
emit trySwitchContextOfLoadedModelCompleted(2);
|
emit trySwitchContextOfLoadedModelCompleted(2);
|
||||||
@ -330,233 +330,119 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
|
|||||||
emit trySwitchContextOfLoadedModelCompleted(0);
|
emit trySwitchContextOfLoadedModelCompleted(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
// TODO: always call with a resource guard held since this didn't previously use coroutines
|
||||||
|
auto ChatLLM::loadModel(const ModelInfo &modelInfo) -> QCoro::Task<bool>
|
||||||
{
|
{
|
||||||
// This is a complicated method because N different possible threads are interested in the outcome
|
// TODO: get the description from somewhere
|
||||||
// of this method. Why? Because we have a main/gui thread trying to monitor the state of N different
|
bool alreadyAcquired = isModelLoaded();
|
||||||
// possible chat threads all vying for a single resource - the currently loaded model - as the user
|
if (alreadyAcquired && *modelInfo.modelDesc() == *m_modelInfo.modelDesc()) {
|
||||||
// switches back and forth between chats. It is important for our main/gui thread to never block
|
|
||||||
// but simultaneously always have up2date information with regards to which chat has the model loaded
|
|
||||||
// and what the type and name of that model is. I've tried to comment extensively in this method
|
|
||||||
// to provide an overview of what we're doing here.
|
|
||||||
|
|
||||||
if (isModelLoaded() && this->modelInfo() == modelInfo) {
|
|
||||||
// already acquired -> keep it
|
// already acquired -> keep it
|
||||||
return true; // already loaded
|
if (modelInfo != m_modelInfo) {
|
||||||
|
// switch to different clone of same model
|
||||||
|
Q_ASSERT(modelInfo.isClone() || m_modelInfo.isClone());
|
||||||
|
m_modelInfo = modelInfo;
|
||||||
|
}
|
||||||
|
co_return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset status
|
// reset status
|
||||||
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
||||||
emit modelLoadingError("");
|
emit modelLoadingError("");
|
||||||
|
|
||||||
QString filePath = modelInfo.dirpath + modelInfo.filename();
|
|
||||||
QFileInfo fileInfo(filePath);
|
|
||||||
|
|
||||||
// We have a live model, but it isn't the one we want
|
|
||||||
bool alreadyAcquired = isModelLoaded();
|
|
||||||
if (alreadyAcquired) {
|
if (alreadyAcquired) {
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
// we own a different model -> destroy it and load the requested one
|
||||||
qDebug() << "already acquired model deleted" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
m_llmInstance.reset();
|
||||||
#endif
|
} else if (!m_isServer) { // (the server loads models lazily rather than eagerly)
|
||||||
m_llModelInfo.resetModel(this);
|
// wait for the model to become available
|
||||||
} else if (!m_isServer) {
|
acquireModel(); // (blocks)
|
||||||
// This is a blocking call that tries to retrieve the model we need from the model store.
|
|
||||||
// If it succeeds, then we just have to restore state. If the store has never had a model
|
// check if request was canceled while we were waiting
|
||||||
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
|
|
||||||
acquireModel();
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
|
||||||
qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
|
||||||
#endif
|
|
||||||
// At this point it is possible that while we were blocked waiting to acquire the model from the
|
|
||||||
// store, that our state was changed to not be loaded. If this is the case, release the model
|
|
||||||
// back into the store and quit loading
|
|
||||||
if (!m_shouldBeLoaded) {
|
if (!m_shouldBeLoaded) {
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
|
||||||
qDebug() << "no longer need model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
|
||||||
#endif
|
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
|
||||||
emit modelLoadingPercentageChanged(0.0f);
|
emit modelLoadingPercentageChanged(0.0f);
|
||||||
return false;
|
co_return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the store just gave us exactly the model we were looking for
|
// if it was the requested model, we are done
|
||||||
if (m_llModelInfo.model && m_llModelInfo.fileInfo == fileInfo) {
|
if (m_llmInstance && *m_llmInstance->description() == *modelInfo.modelDesc()) {
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
|
||||||
qDebug() << "store had our model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
|
||||||
#endif
|
|
||||||
emit modelLoadingPercentageChanged(1.0f);
|
emit modelLoadingPercentageChanged(1.0f);
|
||||||
setModelInfo(modelInfo);
|
setModelInfo(modelInfo);
|
||||||
Q_ASSERT(!m_modelInfo.filename().isEmpty());
|
Q_ASSERT(!m_modelInfo.filename().isEmpty());
|
||||||
if (m_modelInfo.filename().isEmpty())
|
if (m_modelInfo.filename().isEmpty())
|
||||||
emit modelLoadingError(u"Modelinfo is left null for %1"_s.arg(modelInfo.filename()));
|
emit modelLoadingError(u"Modelinfo is left null for %1"_s.arg(modelInfo.filename()));
|
||||||
return true;
|
co_return true;
|
||||||
} else {
|
|
||||||
// Release the memory since we have to switch to a different model.
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
|
||||||
qDebug() << "deleting model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
|
||||||
#endif
|
|
||||||
m_llModelInfo.resetModel(this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we own a different model -> destroy it and load the requested one
|
||||||
|
m_llmInstance.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Guarantee we've released the previous models memory
|
QVariantMap modelLoadProps;
|
||||||
Q_ASSERT(!m_llModelInfo.model);
|
if (!co_await loadNewModel(modelInfo, modelLoadProps))
|
||||||
|
co_return false; // m_shouldBeLoaded became false
|
||||||
|
|
||||||
// Store the file info in the modelInfo in case we have an error loading
|
emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
|
||||||
m_llModelInfo.fileInfo = fileInfo;
|
emit loadedModelInfoChanged();
|
||||||
|
|
||||||
if (fileInfo.exists()) {
|
modelLoadProps.insert("model", modelInfo.filename());
|
||||||
QVariantMap modelLoadProps;
|
Network::globalInstance()->trackChatEvent("model_load", modelLoadProps);
|
||||||
if (modelInfo.isOnline) {
|
|
||||||
QString apiKey;
|
|
||||||
QString requestUrl;
|
|
||||||
QString modelName;
|
|
||||||
{
|
|
||||||
QFile file(filePath);
|
|
||||||
bool success = file.open(QIODeviceBase::ReadOnly);
|
|
||||||
(void)success;
|
|
||||||
Q_ASSERT(success);
|
|
||||||
QJsonDocument doc = QJsonDocument::fromJson(file.readAll());
|
|
||||||
QJsonObject obj = doc.object();
|
|
||||||
apiKey = obj["apiKey"].toString();
|
|
||||||
modelName = obj["modelName"].toString();
|
|
||||||
if (modelInfo.isCompatibleApi) {
|
|
||||||
QString baseUrl(obj["baseUrl"].toString());
|
|
||||||
QUrl apiUrl(QUrl::fromUserInput(baseUrl));
|
|
||||||
if (!Network::isHttpUrlValid(apiUrl)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
QString currentPath(apiUrl.path());
|
|
||||||
QString suffixPath("%1/chat/completions");
|
|
||||||
apiUrl.setPath(suffixPath.arg(currentPath));
|
|
||||||
requestUrl = apiUrl.toString();
|
|
||||||
} else {
|
|
||||||
requestUrl = modelInfo.url();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_llModelType = LLModelTypeV1::API;
|
|
||||||
ChatAPI *model = new ChatAPI();
|
|
||||||
model->setModelName(modelName);
|
|
||||||
model->setRequestURL(requestUrl);
|
|
||||||
model->setAPIKey(apiKey);
|
|
||||||
m_llModelInfo.resetModel(this, model);
|
|
||||||
} else if (!loadNewModel(modelInfo, modelLoadProps)) {
|
|
||||||
return false; // m_shouldBeLoaded became false
|
|
||||||
}
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
|
||||||
qDebug() << "new model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
|
||||||
#endif
|
|
||||||
#if defined(DEBUG)
|
|
||||||
qDebug() << "modelLoadedChanged" << m_llmThread.objectName();
|
|
||||||
fflush(stdout);
|
|
||||||
#endif
|
|
||||||
emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
|
|
||||||
emit loadedModelInfoChanged();
|
|
||||||
|
|
||||||
modelLoadProps.insert("model", modelInfo.filename());
|
if (m_llmInstance)
|
||||||
Network::globalInstance()->trackChatEvent("model_load", modelLoadProps);
|
|
||||||
} else {
|
|
||||||
if (!m_isServer)
|
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo)); // release back into the store
|
|
||||||
resetModel();
|
|
||||||
emit modelLoadingError(u"Could not find file for model %1"_s.arg(modelInfo.filename()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_llModelInfo.model)
|
|
||||||
setModelInfo(modelInfo);
|
setModelInfo(modelInfo);
|
||||||
return bool(m_llModelInfo.model);
|
co_return bool(m_llmInstance);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns false if the model should no longer be loaded (!m_shouldBeLoaded).
|
/* Returns false if the model should no longer be loaded (!m_shouldBeLoaded).
|
||||||
* Otherwise returns true, even on error. */
|
* Otherwise returns true, even on error. */
|
||||||
bool ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps)
|
auto ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) -> QCoro::Task<bool>
|
||||||
{
|
{
|
||||||
|
auto *mysettings = MySettings::globalInstance();
|
||||||
|
|
||||||
QElapsedTimer modelLoadTimer;
|
QElapsedTimer modelLoadTimer;
|
||||||
modelLoadTimer.start();
|
modelLoadTimer.start();
|
||||||
|
|
||||||
int n_ctx = MySettings::globalInstance()->modelContextLength(modelInfo);
|
// TODO: pass these as generation params
|
||||||
int ngl = MySettings::globalInstance()->modelGpuLayers(modelInfo);
|
int n_ctx = mysettings->modelContextLength(modelInfo);
|
||||||
|
int ngl = mysettings->modelGpuLayers (modelInfo);
|
||||||
|
|
||||||
std::string backend = "auto";
|
m_llmInstance = modelInfo.modelDesc()->newInstance(&m_nam);
|
||||||
QString filePath = modelInfo.dirpath + modelInfo.filename();
|
|
||||||
|
|
||||||
auto construct = [this, &filePath, &modelInfo, &modelLoadProps, n_ctx]() {
|
// TODO: progress callback
|
||||||
QString constructError;
|
#if 0
|
||||||
m_llModelInfo.resetModel(this);
|
m_llmInstance->setProgressCallback([this](float progress) -> bool {
|
||||||
auto *model = LLModel::Implementation::construct(filePath.toStdString(), "", n_ctx);
|
progress = std::max(progress, std::numeric_limits<float>::min()); // keep progress above zero
|
||||||
m_llModelInfo.resetModel(this, model);
|
emit modelLoadingPercentageChanged(progress);
|
||||||
|
return m_shouldBeLoaded;
|
||||||
if (!m_llModelInfo.model) {
|
});
|
||||||
if (!m_isServer)
|
#endif
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
co_await m_llmInstance->preload();
|
||||||
resetModel();
|
|
||||||
emit modelLoadingError(u"Error loading %1: %2"_s.arg(modelInfo.filename(), constructError));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_llModelInfo.model->setProgressCallback([this](float progress) -> bool {
|
|
||||||
progress = std::max(progress, std::numeric_limits<float>::min()); // keep progress above zero
|
|
||||||
emit modelLoadingPercentageChanged(progress);
|
|
||||||
return m_shouldBeLoaded;
|
|
||||||
});
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!construct())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (m_llModelInfo.model->isModelBlacklisted(filePath.toStdString())) {
|
|
||||||
static QSet<QString> warned;
|
|
||||||
auto fname = modelInfo.filename();
|
|
||||||
if (!warned.contains(fname)) {
|
|
||||||
emit modelLoadingWarning(
|
|
||||||
u"%1 is known to be broken. Please get a replacement via the download dialog."_s.arg(fname)
|
|
||||||
);
|
|
||||||
warned.insert(fname); // don't warn again until restart
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);
|
|
||||||
|
|
||||||
if (!m_shouldBeLoaded) {
|
if (!m_shouldBeLoaded) {
|
||||||
m_llModelInfo.resetModel(this);
|
m_llmInstance.reset();
|
||||||
if (!m_isServer)
|
if (!m_isServer)
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
|
||||||
resetModel();
|
resetModel();
|
||||||
emit modelLoadingPercentageChanged(0.0f);
|
emit modelLoadingPercentageChanged(0.0f);
|
||||||
return false;
|
co_return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool success = true; // TODO: check for failure
|
||||||
if (!success) {
|
if (!success) {
|
||||||
m_llModelInfo.resetModel(this);
|
m_llmInstance.reset();
|
||||||
if (!m_isServer)
|
if (!m_isServer)
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
|
||||||
resetModel();
|
resetModel();
|
||||||
emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
|
emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
|
||||||
modelLoadProps.insert("error", "loadmodel_failed");
|
modelLoadProps.insert("error", "loadmodel_failed");
|
||||||
return true;
|
co_return true;
|
||||||
}
|
|
||||||
|
|
||||||
switch (m_llModelInfo.model->implementation().modelType()[0]) {
|
|
||||||
case 'L': m_llModelType = LLModelTypeV1::LLAMA; break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
m_llModelInfo.resetModel(this);
|
|
||||||
if (!m_isServer)
|
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
|
||||||
resetModel();
|
|
||||||
emit modelLoadingError(u"Could not determine model type for %1"_s.arg(modelInfo.filename()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
modelLoadProps.insert("$duration", modelLoadTimer.elapsed() / 1000.);
|
modelLoadProps.insert("$duration", modelLoadTimer.elapsed() / 1000.);
|
||||||
return true;
|
co_return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ChatLLM::isModelLoaded() const
|
bool ChatLLM::isModelLoaded() const
|
||||||
{
|
{ return bool(m_llmInstance); }
|
||||||
return m_llModelInfo.model && m_llModelInfo.model->isModelLoaded();
|
|
||||||
}
|
|
||||||
|
|
||||||
static QString &removeLeadingWhitespace(QString &s)
|
static QString &removeLeadingWhitespace(QString &s)
|
||||||
{
|
{
|
||||||
@ -599,50 +485,34 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::acquireModel()
|
void ChatLLM::acquireModel()
|
||||||
{
|
{ m_llmInstance = LLModelStore::globalInstance()->acquireModel(); }
|
||||||
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
|
||||||
emit loadedModelInfoChanged();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ChatLLM::resetModel()
|
void ChatLLM::resetModel()
|
||||||
{
|
{ m_llmInstance.reset(); }
|
||||||
m_llModelInfo = {};
|
|
||||||
emit loadedModelInfoChanged();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
|
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
|
||||||
{
|
{
|
||||||
// ignore attempts to switch to the same model twice
|
// ignore attempts to switch to the same model twice
|
||||||
if (!isModelLoaded() || this->modelInfo() != modelInfo) {
|
if (!isModelLoaded() || this->modelInfo() != modelInfo) {
|
||||||
m_shouldBeLoaded = true;
|
m_shouldBeLoaded = true;
|
||||||
loadModel(modelInfo);
|
QCoro::waitFor(loadModel(modelInfo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static backend::GenerationParams genParamsFromSettings(const ModelInfo &modelInfo)
|
auto ChatLLM::modelDescription() -> const ModelDescription *
|
||||||
{
|
{ return m_llmInstance->description(); }
|
||||||
auto *mySettings = MySettings::globalInstance();
|
|
||||||
return {
|
|
||||||
.n_predict = mySettings->modelMaxLength (modelInfo),
|
|
||||||
.top_k = mySettings->modelTopK (modelInfo),
|
|
||||||
.top_p = float(mySettings->modelTopP (modelInfo)),
|
|
||||||
.min_p = float(mySettings->modelMinP (modelInfo)),
|
|
||||||
.temp = float(mySettings->modelTemperature (modelInfo)),
|
|
||||||
.n_batch = mySettings->modelPromptBatchSize (modelInfo),
|
|
||||||
.repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
|
|
||||||
.repeat_last_n = mySettings->modelRepeatPenaltyTokens(modelInfo),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void ChatLLM::prompt(const QStringList &enabledCollections)
|
void ChatLLM::prompt(const QStringList &enabledCollections)
|
||||||
{
|
{
|
||||||
|
auto *mySettings = MySettings::globalInstance();
|
||||||
|
|
||||||
if (!isModelLoaded()) {
|
if (!isModelLoaded()) {
|
||||||
emit responseStopped(0);
|
emit responseStopped(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
promptInternalChat(enabledCollections, genParamsFromSettings(m_modelInfo));
|
promptInternalChat(enabledCollections, mySettings->modelGenParams(m_modelInfo));
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
// FIXME(jared): this is neither translated nor serialized
|
// FIXME(jared): this is neither translated nor serialized
|
||||||
m_chatModel->setResponseValue(u"Error: %1"_s.arg(QString::fromUtf8(e.what())));
|
m_chatModel->setResponseValue(u"Error: %1"_s.arg(QString::fromUtf8(e.what())));
|
||||||
@ -706,7 +576,6 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
|
|||||||
Q_ASSERT(items.size() >= 1);
|
Q_ASSERT(items.size() >= 1);
|
||||||
|
|
||||||
auto *mySettings = MySettings::globalInstance();
|
auto *mySettings = MySettings::globalInstance();
|
||||||
auto &model = m_llModelInfo.model;
|
|
||||||
|
|
||||||
QString chatTemplate, systemMessage;
|
QString chatTemplate, systemMessage;
|
||||||
auto chatTemplateSetting = mySettings->modelChatTemplate(m_modelInfo);
|
auto chatTemplateSetting = mySettings->modelChatTemplate(m_modelInfo);
|
||||||
@ -756,8 +625,11 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
|
|||||||
{ "add_generation_prompt", true },
|
{ "add_generation_prompt", true },
|
||||||
{ "toolList", toolList },
|
{ "toolList", toolList },
|
||||||
};
|
};
|
||||||
for (auto &[name, token] : model->specialTokens())
|
// TODO: implement special tokens
|
||||||
|
#if 0
|
||||||
|
for (auto &[name, token] : m_llmInstance->specialTokens())
|
||||||
params.emplace(std::move(name), std::move(token));
|
params.emplace(std::move(name), std::move(token));
|
||||||
|
#endif
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto tmpl = loadJinjaTemplate(chatTemplate.toStdString());
|
auto tmpl = loadJinjaTemplate(chatTemplate.toStdString());
|
||||||
@ -769,7 +641,7 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const MessageItem> items) cons
|
|||||||
Q_UNREACHABLE();
|
Q_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const backend::GenerationParams ¶ms,
|
auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const GenerationParams ¶ms,
|
||||||
qsizetype startOffset) -> ChatPromptResult
|
qsizetype startOffset) -> ChatPromptResult
|
||||||
{
|
{
|
||||||
Q_ASSERT(isModelLoaded());
|
Q_ASSERT(isModelLoaded());
|
||||||
@ -876,10 +748,9 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto ChatLLM::promptInternal(
|
auto ChatLLM::promptInternal(
|
||||||
const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
|
const std::variant<std::span<const MessageItem>, std::string_view> &prompt, const GenerationParams ¶ms,
|
||||||
const backend::GenerationParams params,
|
|
||||||
bool usedLocalDocs
|
bool usedLocalDocs
|
||||||
) -> PromptResult
|
) -> QCoro::Task<PromptResult>
|
||||||
{
|
{
|
||||||
Q_ASSERT(isModelLoaded());
|
Q_ASSERT(isModelLoaded());
|
||||||
|
|
||||||
@ -897,22 +768,6 @@ auto ChatLLM::promptInternal(
|
|||||||
conversation = jinjaBuffer;
|
conversation = jinjaBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for overlength last message
|
|
||||||
if (!dynamic_cast<const ChatAPI *>(m_llModelInfo.model.get())) {
|
|
||||||
auto nCtx = m_llModelInfo.model->contextLength();
|
|
||||||
std::string jinjaBuffer2;
|
|
||||||
auto lastMessageRendered = (messageItems && messageItems->size() > 1)
|
|
||||||
? std::string_view(jinjaBuffer2 = applyJinjaTemplate({ &messageItems->back(), 1 }))
|
|
||||||
: conversation;
|
|
||||||
int32_t lastMessageLength = m_llModelInfo.model->countPromptTokens(lastMessageRendered);
|
|
||||||
if (auto limit = nCtx - 4; lastMessageLength > limit) {
|
|
||||||
throw std::invalid_argument(
|
|
||||||
tr("Your message was too long and could not be processed (%1 > %2). "
|
|
||||||
"Please try again with something shorter.").arg(lastMessageLength).arg(limit).toUtf8().constData()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PromptResult result {};
|
PromptResult result {};
|
||||||
|
|
||||||
QElapsedTimer totalTime;
|
QElapsedTimer totalTime;
|
||||||
@ -920,16 +775,14 @@ auto ChatLLM::promptInternal(
|
|||||||
ChatViewResponseHandler respHandler(this, &totalTime, &result);
|
ChatViewResponseHandler respHandler(this, &totalTime, &result);
|
||||||
|
|
||||||
m_timer->start();
|
m_timer->start();
|
||||||
QStringList finalBuffers;
|
PromptModelWithToolsResult withToolsResult;
|
||||||
bool shouldExecuteTool;
|
|
||||||
try {
|
try {
|
||||||
emit promptProcessing();
|
emit promptProcessing();
|
||||||
m_llModelInfo.model->setThreadCount(mySettings->threadCount());
|
|
||||||
m_stopGenerating = false;
|
m_stopGenerating = false;
|
||||||
// TODO: set result.promptTokens based on ollama prompt_eval_count
|
// TODO: set result.promptTokens based on ollama prompt_eval_count
|
||||||
// TODO: support interruption via m_stopGenerating
|
// TODO: support interruption via m_stopGenerating
|
||||||
std::tie(finalBuffers, shouldExecuteTool) = promptModelWithTools(
|
withToolsResult = co_await promptModelWithTools(
|
||||||
m_llModelInfo.model.get(), handlePrompt, respHandler, params,
|
m_llmInstance.get(), respHandler, params,
|
||||||
QByteArray::fromRawData(conversation.data(), conversation.size()),
|
QByteArray::fromRawData(conversation.data(), conversation.size()),
|
||||||
ToolCallConstants::AllTagNames
|
ToolCallConstants::AllTagNames
|
||||||
);
|
);
|
||||||
@ -937,6 +790,8 @@ auto ChatLLM::promptInternal(
|
|||||||
m_timer->stop();
|
m_timer->stop();
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
// TODO: use metadata
|
||||||
|
auto &[metadata, finalBuffers, shouldExecuteTool] = withToolsResult;
|
||||||
|
|
||||||
m_timer->stop();
|
m_timer->stop();
|
||||||
qint64 elapsed = totalTime.elapsed();
|
qint64 elapsed = totalTime.elapsed();
|
||||||
@ -964,13 +819,13 @@ auto ChatLLM::promptInternal(
|
|||||||
else
|
else
|
||||||
emit responseStopped(elapsed);
|
emit responseStopped(elapsed);
|
||||||
|
|
||||||
return result;
|
co_return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::setShouldBeLoaded(bool b)
|
void ChatLLM::setShouldBeLoaded(bool b)
|
||||||
{
|
{
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "setShouldBeLoaded" << m_llmThread.objectName() << b << m_llModelInfo.model.get();
|
qDebug() << "setShouldBeLoaded" << m_llmThread.objectName() << b << m_llmInstance.model.get();
|
||||||
#endif
|
#endif
|
||||||
m_shouldBeLoaded = b; // atomic
|
m_shouldBeLoaded = b; // atomic
|
||||||
emit shouldBeLoadedChanged();
|
emit shouldBeLoadedChanged();
|
||||||
@ -1001,15 +856,15 @@ void ChatLLM::unloadModel()
|
|||||||
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
||||||
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "unloadModel" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
qDebug() << "unloadModel" << m_llmThread.objectName() << m_llmInstance.model.get();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (m_forceUnloadModel) {
|
if (m_forceUnloadModel) {
|
||||||
m_llModelInfo.resetModel(this);
|
m_llmInstance.reset();
|
||||||
m_forceUnloadModel = false;
|
m_forceUnloadModel = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
LLModelStore::globalInstance()->releaseModel(std::move(m_llmInstance));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::reloadModel()
|
void ChatLLM::reloadModel()
|
||||||
@ -1021,13 +876,13 @@ void ChatLLM::reloadModel()
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "reloadModel" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
qDebug() << "reloadModel" << m_llmThread.objectName() << m_llmInstance.model.get();
|
||||||
#endif
|
#endif
|
||||||
const ModelInfo m = modelInfo();
|
const ModelInfo m = modelInfo();
|
||||||
if (m.name().isEmpty())
|
if (m.name().isEmpty())
|
||||||
loadDefaultModel();
|
loadDefaultModel();
|
||||||
else
|
else
|
||||||
loadModel(m);
|
QCoro::waitFor(loadModel(m));
|
||||||
}
|
}
|
||||||
|
|
||||||
// This class throws discards the text within thinking tags, for use with chat names and follow-up questions.
|
// This class throws discards the text within thinking tags, for use with chat names and follow-up questions.
|
||||||
@ -1111,8 +966,8 @@ void ChatLLM::generateName()
|
|||||||
try {
|
try {
|
||||||
// TODO: support interruption via m_stopGenerating
|
// TODO: support interruption via m_stopGenerating
|
||||||
promptModelWithTools(
|
promptModelWithTools(
|
||||||
m_llModelInfo.model.get(),
|
m_llmInstance.get(),
|
||||||
respHandler, genParamsFromSettings(m_modelInfo),
|
respHandler, mySettings->modelGenParams(m_modelInfo),
|
||||||
applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(),
|
applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(),
|
||||||
{ ToolCallConstants::ThinkTagName }
|
{ ToolCallConstants::ThinkTagName }
|
||||||
);
|
);
|
||||||
@ -1187,8 +1042,8 @@ void ChatLLM::generateQuestions(qint64 elapsed)
|
|||||||
try {
|
try {
|
||||||
// TODO: support interruption via m_stopGenerating
|
// TODO: support interruption via m_stopGenerating
|
||||||
promptModelWithTools(
|
promptModelWithTools(
|
||||||
m_llModelInfo.model.get(),
|
m_llmInstance.get(),
|
||||||
respHandler, genParamsFromSettings(m_modelInfo),
|
respHandler, mySettings->modelGenParams(m_modelInfo),
|
||||||
applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(),
|
applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(),
|
||||||
{ ToolCallConstants::ThinkTagName }
|
{ ToolCallConstants::ThinkTagName }
|
||||||
);
|
);
|
||||||
@ -1199,39 +1054,13 @@ void ChatLLM::generateQuestions(qint64 elapsed)
|
|||||||
emit responseStopped(elapsed);
|
emit responseStopped(elapsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function serialized the cached model state to disk.
|
|
||||||
// we want to also serialize n_ctx, and read it at load time.
|
|
||||||
bool ChatLLM::serialize(QDataStream &stream, int version)
|
bool ChatLLM::serialize(QDataStream &stream, int version)
|
||||||
{
|
{
|
||||||
if (version < 11) {
|
static constexpr int VERSION_MIN = 13;
|
||||||
if (version >= 6) {
|
if (version < VERSION_MIN)
|
||||||
stream << false; // serializeKV
|
throw std::runtime_error(fmt::format("ChatLLM does not support serializing as version {} (min is {})",
|
||||||
}
|
version, VERSION_MIN));
|
||||||
if (version >= 2) {
|
// nothing to do here; ChatLLM doesn't serialize any state itself anymore
|
||||||
if (m_llModelType == LLModelTypeV1::NONE) {
|
|
||||||
qWarning() << "ChatLLM ERROR: attempted to serialize a null model for chat id" << m_chat->id()
|
|
||||||
<< "name" << m_chat->name();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
stream << m_llModelType;
|
|
||||||
stream << 0; // state version
|
|
||||||
}
|
|
||||||
{
|
|
||||||
QString dummy;
|
|
||||||
stream << dummy; // response
|
|
||||||
stream << dummy; // generated name
|
|
||||||
}
|
|
||||||
stream << quint32(0); // prompt + response tokens
|
|
||||||
|
|
||||||
if (version < 6) { // serialize binary state
|
|
||||||
if (version < 4) {
|
|
||||||
stream << 0; // responseLogits
|
|
||||||
}
|
|
||||||
stream << int32_t(0); // n_past
|
|
||||||
stream << quint64(0); // input token count
|
|
||||||
stream << QByteArray(); // KV cache state
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return stream.status() == QDataStream::Ok;
|
return stream.status() == QDataStream::Ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "chatmodel.h"
|
#include "chatmodel.h"
|
||||||
#include "database.h"
|
#include "database.h"
|
||||||
#include "llmodel/chat.h"
|
#include "llmodel_chat.h"
|
||||||
#include "modellist.h"
|
#include "modellist.h"
|
||||||
|
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
@ -31,6 +31,7 @@ using namespace Qt::Literals::StringLiterals;
|
|||||||
|
|
||||||
class ChatLLM;
|
class ChatLLM;
|
||||||
class QDataStream;
|
class QDataStream;
|
||||||
|
namespace QCoro { template <typename T> class Task; }
|
||||||
|
|
||||||
|
|
||||||
// NOTE: values serialized to disk, do not change or reuse
|
// NOTE: values serialized to disk, do not change or reuse
|
||||||
@ -89,12 +90,6 @@ inline LLModelTypeV1 parseLLModelTypeV0(int v0)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LLModelInfo {
|
|
||||||
std::unique_ptr<gpt4all::ui::ChatLLModel> model;
|
|
||||||
QFileInfo fileInfo;
|
|
||||||
void resetModel(ChatLLM *cllm, gpt4all::ui::ChatLLModel *model = nullptr);
|
|
||||||
};
|
|
||||||
|
|
||||||
class TokenTimer : public QObject {
|
class TokenTimer : public QObject {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
@ -173,7 +168,7 @@ public Q_SLOTS:
|
|||||||
void prompt(const QStringList &enabledCollections);
|
void prompt(const QStringList &enabledCollections);
|
||||||
bool loadDefaultModel();
|
bool loadDefaultModel();
|
||||||
void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
|
void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
|
||||||
bool loadModel(const ModelInfo &modelInfo);
|
auto loadModel(const ModelInfo &modelInfo) -> QCoro::Task<bool>;
|
||||||
void modelChangeRequested(const ModelInfo &modelInfo);
|
void modelChangeRequested(const ModelInfo &modelInfo);
|
||||||
void unloadModel();
|
void unloadModel();
|
||||||
void reloadModel();
|
void reloadModel();
|
||||||
@ -215,14 +210,16 @@ protected:
|
|||||||
QList<ResultInfo> databaseResults;
|
QList<ResultInfo> databaseResults;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::backend::GenerationParams ¶ms,
|
auto modelDescription() -> const gpt4all::ui::ModelDescription *;
|
||||||
|
|
||||||
|
auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::ui::GenerationParams ¶ms,
|
||||||
qsizetype startOffset = 0) -> ChatPromptResult;
|
qsizetype startOffset = 0) -> ChatPromptResult;
|
||||||
// passing a string_view directly skips templating and uses the raw string
|
// passing a string_view directly skips templating and uses the raw string
|
||||||
auto promptInternal(const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
|
auto promptInternal(const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
|
||||||
const gpt4all::backend::GenerationParams ¶ms, bool usedLocalDocs) -> PromptResult;
|
const gpt4all::ui::GenerationParams ¶ms, bool usedLocalDocs) -> QCoro::Task<PromptResult>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
auto loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) -> QCoro::Task<bool>;
|
||||||
|
|
||||||
std::vector<MessageItem> forkConversation(const QString &prompt) const;
|
std::vector<MessageItem> forkConversation(const QString &prompt) const;
|
||||||
|
|
||||||
@ -237,11 +234,11 @@ protected:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
const Chat *m_chat;
|
const Chat *m_chat;
|
||||||
LLModelInfo m_llModelInfo;
|
std::unique_ptr<gpt4all::ui::ChatLLMInstance> m_llmInstance;
|
||||||
LLModelTypeV1 m_llModelType = LLModelTypeV1::NONE;
|
|
||||||
ModelInfo m_modelInfo;
|
ModelInfo m_modelInfo;
|
||||||
TokenTimer *m_timer;
|
TokenTimer *m_timer;
|
||||||
QThread m_llmThread;
|
QThread m_llmThread;
|
||||||
|
QNetworkAccessManager m_nam; // TODO(jared): avoid making multiple thread pools
|
||||||
std::atomic<bool> m_stopGenerating;
|
std::atomic<bool> m_stopGenerating;
|
||||||
std::atomic<bool> m_shouldBeLoaded;
|
std::atomic<bool> m_shouldBeLoaded;
|
||||||
std::atomic<bool> m_forceUnloadModel;
|
std::atomic<bool> m_forceUnloadModel;
|
||||||
|
@ -110,10 +110,6 @@ bool EmbeddingLLMWorker::loadModel()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME(jared): the user may want this to take effect without having to restart
|
|
||||||
int n_threads = MySettings::globalInstance()->threadCount();
|
|
||||||
m_model->setThreadCount(n_threads);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
46
gpt4all-chat/src/json-helpers.cpp
Normal file
46
gpt4all-chat/src/json-helpers.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#include "json-helpers.h"
|
||||||
|
|
||||||
|
#include <boost/json.hpp> // IWYU pragma: keep
|
||||||
|
#include <boost/system.hpp> // IWYU pragma: keep
|
||||||
|
#include <gpt4all-backend/json-helpers.h>
|
||||||
|
|
||||||
|
#include <QByteArray>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QUuid>
|
||||||
|
#include <QtAssert>
|
||||||
|
|
||||||
|
#include <system_error>
|
||||||
|
|
||||||
|
namespace json = boost::json;
|
||||||
|
namespace sys = boost::system;
|
||||||
|
|
||||||
|
|
||||||
|
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUuid &uuid)
|
||||||
|
{
|
||||||
|
auto bytes = uuid.toRfc4122().toBase64();
|
||||||
|
value = json::value_from(json::string_view(bytes.data(), bytes.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
QUuid tag_invoke(const boost::json::value_to_tag<QUuid> &, const boost::json::value &value)
|
||||||
|
{
|
||||||
|
auto &s = value.as_string();
|
||||||
|
auto bytes = QByteArray::fromRawData(s.data(), s.size());
|
||||||
|
auto result = QByteArray::fromBase64Encoding(bytes);
|
||||||
|
if (!result)
|
||||||
|
throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
|
||||||
|
auto uuid = QUuid::fromRfc4122(result.decoded);
|
||||||
|
Q_ASSERT(!uuid.isNull()); // this may fail if the user manually creates a null UUID
|
||||||
|
return uuid;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUrl &url)
|
||||||
|
{
|
||||||
|
auto bytes = url.toEncoded();
|
||||||
|
value = json::value_from(json::string_view(bytes.data(), bytes.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
QUrl tag_invoke(const boost::json::value_to_tag<QUrl> &, const boost::json::value &value)
|
||||||
|
{
|
||||||
|
auto &s = value.as_string();
|
||||||
|
return QUrl::fromEncoded(QByteArray::fromRawData(s.data(), s.size()));
|
||||||
|
}
|
15
gpt4all-chat/src/json-helpers.h
Normal file
15
gpt4all-chat/src/json-helpers.h
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
class QUrl;
|
||||||
|
class QUuid;
|
||||||
|
namespace boost::json {
|
||||||
|
class value;
|
||||||
|
struct value_from_tag;
|
||||||
|
template <typename T> struct value_to_tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUuid &uuid);
|
||||||
|
QUuid tag_invoke(const boost::json::value_to_tag<QUuid> &, const boost::json::value &value);
|
||||||
|
|
||||||
|
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QUrl &url);
|
||||||
|
QUrl tag_invoke(const boost::json::value_to_tag<QUrl> &, const boost::json::value &value);
|
@ -1,32 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <QStringView>
|
|
||||||
|
|
||||||
class QString;
|
|
||||||
namespace QCoro { template <typename T> class AsyncGenerator; }
|
|
||||||
namespace gpt4all::backend { struct GenerationParams; }
|
|
||||||
|
|
||||||
|
|
||||||
namespace gpt4all::ui {
|
|
||||||
|
|
||||||
|
|
||||||
struct ChatResponseMetadata {
|
|
||||||
int nPromptTokens;
|
|
||||||
int nResponseTokens;
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: implement two of these; one based on Ollama (TBD) and the other based on OpenAI (chatapi.h)
|
|
||||||
class ChatLLModel {
|
|
||||||
public:
|
|
||||||
virtual ~ChatLLModel() = 0;
|
|
||||||
|
|
||||||
[[nodiscard]]
|
|
||||||
virtual QString name() = 0;
|
|
||||||
|
|
||||||
virtual void preload() = 0;
|
|
||||||
virtual auto chat(QStringView prompt, const backend::GenerationParams ¶ms,
|
|
||||||
/*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString> = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace gpt4all::ui
|
|
@ -1,75 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "chat.h"
|
|
||||||
#include "provider.h"
|
|
||||||
|
|
||||||
#include <QObject>
|
|
||||||
#include <QQmlEngine>
|
|
||||||
#include <QString>
|
|
||||||
#include <QUrl>
|
|
||||||
|
|
||||||
class QNetworkAccessManager;
|
|
||||||
|
|
||||||
|
|
||||||
namespace gpt4all::ui {
|
|
||||||
|
|
||||||
|
|
||||||
class OpenaiModelDescription : public QObject {
|
|
||||||
Q_OBJECT
|
|
||||||
QML_ELEMENT
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit OpenaiModelDescription(OpenaiProvider *provider, QString displayName, QString modelName)
|
|
||||||
: QObject(provider)
|
|
||||||
, m_provider(provider)
|
|
||||||
, m_displayName(std::move(displayName))
|
|
||||||
, m_modelName(std::move(modelName))
|
|
||||||
{}
|
|
||||||
|
|
||||||
// getters
|
|
||||||
[[nodiscard]] OpenaiProvider *provider () const { return m_provider; }
|
|
||||||
[[nodiscard]] const QString &displayName() const { return m_displayName; }
|
|
||||||
[[nodiscard]] const QString &modelName () const { return m_modelName; }
|
|
||||||
|
|
||||||
// setters
|
|
||||||
void setDisplayName(QString value);
|
|
||||||
void setModelName (QString value);
|
|
||||||
|
|
||||||
Q_SIGNALS:
|
|
||||||
void displayNameChanged(const QString &value);
|
|
||||||
void modelNameChanged (const QString &value);
|
|
||||||
|
|
||||||
private:
|
|
||||||
OpenaiProvider *m_provider;
|
|
||||||
QString m_displayName;
|
|
||||||
QString m_modelName;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct OpenaiConnectionDetails {
|
|
||||||
QUrl baseUrl;
|
|
||||||
QString modelName;
|
|
||||||
QString apiKey;
|
|
||||||
|
|
||||||
OpenaiConnectionDetails(const OpenaiModelDescription *desc)
|
|
||||||
: baseUrl(desc->provider()->baseUrl())
|
|
||||||
, apiKey(desc->provider()->apiKey())
|
|
||||||
, modelName(desc->modelName())
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
|
|
||||||
class OpenaiLLModel : public ChatLLModel {
|
|
||||||
public:
|
|
||||||
explicit OpenaiLLModel(OpenaiConnectionDetails connDetails, QNetworkAccessManager *nam);
|
|
||||||
|
|
||||||
void preload() override { /* not supported -> no-op */ }
|
|
||||||
|
|
||||||
auto chat(QStringView prompt, const backend::GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata)
|
|
||||||
-> QCoro::AsyncGenerator<QString> override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
OpenaiConnectionDetails m_connDetails;
|
|
||||||
QNetworkAccessManager *m_nam;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace gpt4all::ui
|
|
@ -1,26 +0,0 @@
|
|||||||
#include "provider.h"
|
|
||||||
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
|
|
||||||
namespace gpt4all::ui {
|
|
||||||
|
|
||||||
|
|
||||||
void OpenaiProvider::setBaseUrl(QUrl value)
|
|
||||||
{
|
|
||||||
if (m_baseUrl != value) {
|
|
||||||
m_baseUrl = std::move(value);
|
|
||||||
emit baseUrlChanged(m_baseUrl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OpenaiProvider::setApiKey(QString value)
|
|
||||||
{
|
|
||||||
if (m_apiKey != value) {
|
|
||||||
m_apiKey = std::move(value);
|
|
||||||
emit apiKeyChanged(m_apiKey);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace gpt4all::ui
|
|
@ -1,47 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <QObject>
|
|
||||||
#include <QQmlEngine>
|
|
||||||
#include <QString>
|
|
||||||
#include <QUrl>
|
|
||||||
|
|
||||||
|
|
||||||
namespace gpt4all::ui {
|
|
||||||
|
|
||||||
|
|
||||||
class ModelProvider : public QObject {
|
|
||||||
Q_OBJECT
|
|
||||||
|
|
||||||
Q_PROPERTY(QString name READ name CONSTANT)
|
|
||||||
|
|
||||||
public:
|
|
||||||
[[nodiscard]] virtual QString name() = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
class OpenaiProvider : public ModelProvider {
|
|
||||||
Q_OBJECT
|
|
||||||
QML_ELEMENT
|
|
||||||
|
|
||||||
Q_PROPERTY(QUrl baseUrl READ baseUrl WRITE setBaseUrl NOTIFY baseUrlChanged)
|
|
||||||
Q_PROPERTY(QString apiKey READ apiKey WRITE setApiKey NOTIFY apiKeyChanged)
|
|
||||||
|
|
||||||
public:
|
|
||||||
[[nodiscard]] QString name() override { return m_name; }
|
|
||||||
[[nodiscard]] const QUrl &baseUrl() { return m_baseUrl; }
|
|
||||||
[[nodiscard]] const QString &apiKey () { return m_apiKey; }
|
|
||||||
|
|
||||||
void setBaseUrl(QUrl value);
|
|
||||||
void setApiKey (QString value);
|
|
||||||
|
|
||||||
Q_SIGNALS:
|
|
||||||
void baseUrlChanged(const QUrl &value);
|
|
||||||
void apiKeyChanged (const QString &value);
|
|
||||||
|
|
||||||
private:
|
|
||||||
QString m_name;
|
|
||||||
QUrl m_baseUrl;
|
|
||||||
QString m_apiKey;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace gpt4all::ui
|
|
34
gpt4all-chat/src/llmodel_chat.h
Normal file
34
gpt4all-chat/src/llmodel_chat.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
class QString;
|
||||||
|
class QStringView;
|
||||||
|
namespace QCoro {
|
||||||
|
template <typename T> class AsyncGenerator;
|
||||||
|
template <typename T> class Task;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
class GenerationParams;
|
||||||
|
class ModelDescription;
|
||||||
|
|
||||||
|
struct ChatResponseMetadata {
|
||||||
|
int nPromptTokens;
|
||||||
|
int nResponseTokens;
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: implement two of these; one based on Ollama (TBD) and the other based on OpenAI (chatapi.h)
|
||||||
|
class ChatLLMInstance {
|
||||||
|
public:
|
||||||
|
virtual ~ChatLLMInstance() = 0;
|
||||||
|
|
||||||
|
virtual auto description() const -> const ModelDescription * = 0;
|
||||||
|
virtual auto preload() -> QCoro::Task<void> = 0;
|
||||||
|
virtual auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata)
|
||||||
|
-> QCoro::AsyncGenerator<QString> = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
22
gpt4all-chat/src/llmodel_description.cpp
Normal file
22
gpt4all-chat/src/llmodel_description.cpp
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#include "llmodel_description.h"
|
||||||
|
|
||||||
|
#include "llmodel_chat.h"
|
||||||
|
#include "llmodel_provider.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
auto ModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<ChatLLMInstance>
|
||||||
|
{ return std::unique_ptr<ChatLLMInstance>(newInstanceImpl(nam)); }
|
||||||
|
|
||||||
|
bool operator==(const ModelDescription &a, const ModelDescription &b)
|
||||||
|
{
|
||||||
|
if (typeid(a) != typeid(b))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return *a.provider() == *b.provider() && a.key() == b.key();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
40
gpt4all-chat/src/llmodel_description.h
Normal file
40
gpt4all-chat/src/llmodel_description.h
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
#include <QVariant>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
class QNetworkAccessManager;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
class ChatLLMInstance;
|
||||||
|
class ModelProvider;
|
||||||
|
|
||||||
|
// TODO: implement shared_from_this guidance for restricted construction
|
||||||
|
class ModelDescription : public std::enable_shared_from_this<ModelDescription> {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(const ModelProvider *provider READ provider CONSTANT)
|
||||||
|
Q_PROPERTY(QVariant key READ key CONSTANT)
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~ModelDescription() noexcept = 0;
|
||||||
|
|
||||||
|
// getters
|
||||||
|
[[nodiscard]] virtual auto provider() const -> const ModelProvider * = 0;
|
||||||
|
[[nodiscard]] virtual QVariant key () const = 0;
|
||||||
|
|
||||||
|
/// create an instance to chat with
|
||||||
|
[[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<ChatLLMInstance>;
|
||||||
|
|
||||||
|
friend bool operator==(const ModelDescription &a, const ModelDescription &b);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
[[nodiscard]] virtual auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
82
gpt4all-chat/src/llmodel_ollama.cpp
Normal file
82
gpt4all-chat/src/llmodel_ollama.cpp
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#include "llmodel_ollama.h"
|
||||||
|
|
||||||
|
#include <QCoro/QCoroAsyncGenerator>
|
||||||
|
#include <QCoro/QCoroTask>
|
||||||
|
|
||||||
|
using namespace Qt::Literals::StringLiterals;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
void OllamaGenerationParams::parseInner(QMap<GenerationParam, QVariant> &values)
|
||||||
|
{
|
||||||
|
tryParseValue(values, GenerationParam::NPredict, &OllamaGenerationParams::n_predict);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto OllamaGenerationParams::toMap() const -> QMap<QLatin1StringView, QVariant>
|
||||||
|
{
|
||||||
|
return {
|
||||||
|
{ "n_predict"_L1, n_predict },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto OllamaProvider::supportedGenerationParams() const -> QSet<GenerationParam>
|
||||||
|
{
|
||||||
|
using enum GenerationParam;
|
||||||
|
return { NPredict };
|
||||||
|
}
|
||||||
|
|
||||||
|
auto OllamaProvider::makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const
|
||||||
|
-> OllamaGenerationParams *
|
||||||
|
{ return new OllamaGenerationParams(values); }
|
||||||
|
|
||||||
|
/// load
|
||||||
|
OllamaProviderCustom::OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id)
|
||||||
|
: ModelProvider(std::move(id))
|
||||||
|
, ModelProviderCustom(std::move(store))
|
||||||
|
{ load(); }
|
||||||
|
|
||||||
|
/// create
|
||||||
|
OllamaProviderCustom::OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl)
|
||||||
|
: ModelProvider(std::move(name), std::move(baseUrl))
|
||||||
|
, ModelProviderCustom(std::move(store))
|
||||||
|
{
|
||||||
|
auto data = m_store->create(m_name, m_baseUrl);
|
||||||
|
if (!data)
|
||||||
|
data.error().raise();
|
||||||
|
m_id = (*data)->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
OllamaModelDescription::OllamaModelDescription(std::shared_ptr<const OllamaProvider> provider, QByteArray modelHash)
|
||||||
|
: m_provider(std::move(provider))
|
||||||
|
, m_modelHash(std::move(modelHash))
|
||||||
|
{}
|
||||||
|
|
||||||
|
auto OllamaModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OllamaChatModel>
|
||||||
|
{ return std::unique_ptr<OllamaChatModel>(&dynamic_cast<OllamaChatModel &>(*newInstanceImpl(nam))); }
|
||||||
|
|
||||||
|
auto OllamaModelDescription::newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance *
|
||||||
|
{ return new OllamaChatModel({ shared_from_this(), this }, nam); }
|
||||||
|
|
||||||
|
OllamaChatModel::OllamaChatModel(std::shared_ptr<const OllamaModelDescription> description, QNetworkAccessManager *nam)
|
||||||
|
: m_description(std::move(description))
|
||||||
|
, m_nam(nam)
|
||||||
|
{}
|
||||||
|
|
||||||
|
auto OllamaChatModel::preload() -> QCoro::Task<>
|
||||||
|
{
|
||||||
|
// TODO: implement
|
||||||
|
co_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto OllamaChatModel::generate(QStringView prompt, const GenerationParams ¶ms,
|
||||||
|
/*out*/ ChatResponseMetadata &metadata)
|
||||||
|
-> QCoro::AsyncGenerator<QString>
|
||||||
|
{
|
||||||
|
// TODO: implement
|
||||||
|
co_yield QStringLiteral("(TODO: response from ollama)");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
122
gpt4all-chat/src/llmodel_ollama.h
Normal file
122
gpt4all-chat/src/llmodel_ollama.h
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "llmodel_chat.h"
|
||||||
|
#include "llmodel_description.h"
|
||||||
|
#include "llmodel_provider.h"
|
||||||
|
|
||||||
|
#include <QByteArray>
|
||||||
|
#include <QLatin1StringView> // IWYU pragma: keep
|
||||||
|
#include <QObject>
|
||||||
|
#include <QString>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QVariant>
|
||||||
|
#include <QtTypes> // IWYU pragma: keep
|
||||||
|
|
||||||
|
class QNetworkAccessManager;
|
||||||
|
template <typename Key, typename T> class QMap;
|
||||||
|
template <typename T> class QSet;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaChatModel;
|
||||||
|
|
||||||
|
struct OllamaGenerationParamsData {
|
||||||
|
uint n_predict;
|
||||||
|
// TODO(jared): include ollama-specific generation params
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaGenerationParams : public GenerationParams, public OllamaGenerationParamsData {
|
||||||
|
public:
|
||||||
|
explicit OllamaGenerationParams(QMap<GenerationParam, QVariant> values) { parse(std::move(values)); }
|
||||||
|
auto toMap() const -> QMap<QLatin1StringView, QVariant> override;
|
||||||
|
bool isNoop() const override { return !n_predict; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void parseInner(QMap<GenerationParam, QVariant> &values) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaProvider : public QObject, public virtual ModelProvider {
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
~OllamaProvider() noexcept override = 0;
|
||||||
|
|
||||||
|
QObject *asQObject() override { return this; }
|
||||||
|
const QObject *asQObject() const override { return this; }
|
||||||
|
|
||||||
|
auto supportedGenerationParams() const -> QSet<GenerationParam> override;
|
||||||
|
auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> OllamaGenerationParams * override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaProviderBuiltin : public ModelProviderBuiltin, public OllamaProvider {
|
||||||
|
Q_GADGET
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Create a new built-in Ollama provider (transient).
|
||||||
|
explicit OllamaProviderBuiltin(QUuid id, QString name, QUrl baseUrl)
|
||||||
|
: ModelProvider(std::move(id), std::move(name), std::move(baseUrl)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaProviderCustom final : public OllamaProvider, public ModelProviderCustom {
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Load an existing OllamaProvider from disk.
|
||||||
|
explicit OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id);
|
||||||
|
|
||||||
|
/// Create a new OllamaProvider on disk.
|
||||||
|
explicit OllamaProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl);
|
||||||
|
|
||||||
|
Q_SIGNALS:
|
||||||
|
void nameChanged (const QString &value);
|
||||||
|
void baseUrlChanged(const QUrl &value);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
auto asData() -> ModelProviderData override
|
||||||
|
{ return { m_id, ProviderType::ollama, m_name, m_baseUrl, {} }; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaModelDescription : public ModelDescription {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QByteArray modelHash READ modelHash CONSTANT)
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit OllamaModelDescription(std::shared_ptr<const OllamaProvider> provider, QByteArray modelHash);
|
||||||
|
|
||||||
|
// getters
|
||||||
|
[[nodiscard]] auto provider () const -> const OllamaProvider * override { return m_provider.get(); }
|
||||||
|
[[nodiscard]] QVariant key () const override { return m_modelHash; }
|
||||||
|
[[nodiscard]] const QByteArray &modelHash() const { return m_modelHash; }
|
||||||
|
|
||||||
|
[[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OllamaChatModel>;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
[[nodiscard]] auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const OllamaProvider> m_provider;
|
||||||
|
QByteArray m_modelHash;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OllamaChatModel : public ChatLLMInstance {
|
||||||
|
public:
|
||||||
|
explicit OllamaChatModel(std::shared_ptr<const OllamaModelDescription> description, QNetworkAccessManager *nam);
|
||||||
|
|
||||||
|
auto description() const -> const OllamaModelDescription * override
|
||||||
|
{ return m_description.get(); }
|
||||||
|
|
||||||
|
auto preload() -> QCoro::Task<void> override;
|
||||||
|
|
||||||
|
auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata)
|
||||||
|
-> QCoro::AsyncGenerator<QString> override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const OllamaModelDescription> m_description;
|
||||||
|
// TODO: implement generate using Ollama backend
|
||||||
|
QNetworkAccessManager *m_nam;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
@ -1,4 +1,4 @@
|
|||||||
#include "openai.h"
|
#include "llmodel_openai.h"
|
||||||
|
|
||||||
#include "mysettings.h"
|
#include "mysettings.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
@ -6,29 +6,34 @@
|
|||||||
#include <QCoro/QCoroAsyncGenerator> // IWYU pragma: keep
|
#include <QCoro/QCoroAsyncGenerator> // IWYU pragma: keep
|
||||||
#include <QCoro/QCoroNetworkReply> // IWYU pragma: keep
|
#include <QCoro/QCoroNetworkReply> // IWYU pragma: keep
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <gpt4all-backend/formatters.h>
|
#include <gpt4all-backend/formatters.h> // IWYU pragma: keep
|
||||||
#include <gpt4all-backend/generation-params.h>
|
|
||||||
#include <gpt4all-backend/rest.h>
|
#include <gpt4all-backend/rest.h>
|
||||||
|
|
||||||
|
#include <QAnyStringView>
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
#include <QJsonArray>
|
#include <QJsonArray>
|
||||||
#include <QJsonDocument>
|
#include <QJsonDocument>
|
||||||
#include <QJsonObject>
|
#include <QJsonObject>
|
||||||
#include <QJsonValue>
|
#include <QJsonValue>
|
||||||
#include <QLatin1String>
|
#include <QList>
|
||||||
#include <QNetworkAccessManager>
|
#include <QMap>
|
||||||
|
#include <QMetaEnum>
|
||||||
|
#include <QNetworkReply>
|
||||||
#include <QNetworkRequest>
|
#include <QNetworkRequest>
|
||||||
#include <QRestAccessManager>
|
#include <QRestAccessManager>
|
||||||
#include <QRestReply>
|
#include <QRestReply>
|
||||||
|
#include <QSet>
|
||||||
#include <QStringView>
|
#include <QStringView>
|
||||||
#include <QUrl>
|
|
||||||
#include <QUtf8StringView> // IWYU pragma: keep
|
#include <QUtf8StringView> // IWYU pragma: keep
|
||||||
#include <QVariant>
|
#include <QVariant>
|
||||||
#include <QXmlStreamReader>
|
#include <QXmlStreamReader>
|
||||||
#include <Qt>
|
#include <QtAssert>
|
||||||
|
|
||||||
|
#include <coroutine>
|
||||||
#include <expected>
|
#include <expected>
|
||||||
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <stdexcept>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
using namespace Qt::Literals::StringLiterals;
|
using namespace Qt::Literals::StringLiterals;
|
||||||
@ -63,24 +68,72 @@ static auto processRespLine(const QByteArray &line) -> std::optional<QString>
|
|||||||
namespace gpt4all::ui {
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
void OpenaiModelDescription::setDisplayName(QString value)
|
void OpenaiGenerationParams::parseInner(QMap<GenerationParam, QVariant> &values)
|
||||||
{
|
{
|
||||||
if (m_displayName != value) {
|
tryParseValue(values, GenerationParam::NPredict, &OpenaiGenerationParams::n_predict );
|
||||||
m_displayName = std::move(value);
|
tryParseValue(values, GenerationParam::Temperature, &OpenaiGenerationParams::temperature);
|
||||||
emit displayNameChanged(m_displayName);
|
tryParseValue(values, GenerationParam::TopP, &OpenaiGenerationParams::top_p );
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenaiModelDescription::setModelName(QString value)
|
auto OpenaiGenerationParams::toMap() const -> QMap<QLatin1StringView, QVariant>
|
||||||
{
|
{
|
||||||
if (m_modelName != value) {
|
return {
|
||||||
m_modelName = std::move(value);
|
{ "max_completion_tokens"_L1, n_predict },
|
||||||
emit modelNameChanged(m_modelName);
|
{ "temperature"_L1, temperature },
|
||||||
}
|
{ "top_p"_L1, top_p },
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
OpenaiLLModel::OpenaiLLModel(OpenaiConnectionDetails connDetails, QNetworkAccessManager *nam)
|
auto OpenaiProvider::supportedGenerationParams() const -> QSet<GenerationParam>
|
||||||
: m_connDetails(std::move(connDetails))
|
{
|
||||||
|
using enum GenerationParam;
|
||||||
|
return { NPredict, Temperature, TopP };
|
||||||
|
}
|
||||||
|
|
||||||
|
auto OpenaiProvider::makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const
|
||||||
|
-> OpenaiGenerationParams *
|
||||||
|
{ return new OpenaiGenerationParams(values); }
|
||||||
|
|
||||||
|
OpenaiProviderBuiltin::OpenaiProviderBuiltin(QUuid id, QString name, QUrl baseUrl, QString apiKey)
|
||||||
|
: ModelProvider(std::move(id), std::move(name), std::move(baseUrl))
|
||||||
|
, OpenaiProvider(std::move(apiKey))
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// load
|
||||||
|
OpenaiProviderCustom::OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id)
|
||||||
|
: ModelProvider(std::move(id))
|
||||||
|
, ModelProviderCustom(std::move(store))
|
||||||
|
{
|
||||||
|
auto &details = load();
|
||||||
|
m_apiKey = std::get<OpenaiProviderDetails>(details).api_key;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// create
|
||||||
|
OpenaiProviderCustom::OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl,
|
||||||
|
QString apiKey)
|
||||||
|
: ModelProvider(std::move(name), std::move(baseUrl))
|
||||||
|
, ModelProviderCustom(std::move(store))
|
||||||
|
, OpenaiProvider(std::move(apiKey))
|
||||||
|
{
|
||||||
|
auto data = m_store->create(m_name, m_baseUrl, m_apiKey);
|
||||||
|
if (!data)
|
||||||
|
data.error().raise();
|
||||||
|
m_id = (*data)->id;
|
||||||
|
}
|
||||||
|
|
||||||
|
OpenaiModelDescription::OpenaiModelDescription(std::shared_ptr<const OpenaiProvider> provider, QString modelName)
|
||||||
|
: m_provider(std::move(provider))
|
||||||
|
, m_modelName(std::move(modelName))
|
||||||
|
{}
|
||||||
|
|
||||||
|
auto OpenaiModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OpenaiChatModel>
|
||||||
|
{ return std::unique_ptr<OpenaiChatModel>(&dynamic_cast<OpenaiChatModel &>(*newInstanceImpl(nam))); }
|
||||||
|
|
||||||
|
auto OpenaiModelDescription::newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance *
|
||||||
|
{ return new OpenaiChatModel({ shared_from_this(), this }, nam); }
|
||||||
|
|
||||||
|
OpenaiChatModel::OpenaiChatModel(std::shared_ptr<const OpenaiModelDescription> description, QNetworkAccessManager *nam)
|
||||||
|
: m_description(std::move(description))
|
||||||
, m_nam(nam)
|
, m_nam(nam)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -159,21 +212,22 @@ static auto parsePrompt(QXmlStreamReader &xml) -> std::expected<QJsonArray, QStr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto OpenaiLLModel::chat(QStringView prompt, const backend::GenerationParams ¶ms,
|
auto preload() -> QCoro::Task<>
|
||||||
/*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString>
|
{ co_return; /* not supported -> no-op */ }
|
||||||
|
|
||||||
|
auto OpenaiChatModel::generate(QStringView prompt, const GenerationParams ¶ms,
|
||||||
|
/*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator<QString>
|
||||||
{
|
{
|
||||||
auto *mySettings = MySettings::globalInstance();
|
auto *mySettings = MySettings::globalInstance();
|
||||||
|
|
||||||
if (!params.n_predict)
|
if (params.isNoop())
|
||||||
co_return; // nothing requested
|
co_return; // nothing requested
|
||||||
|
|
||||||
auto reqBody = makeJsonObject({
|
auto reqBody = makeJsonObject({
|
||||||
{ "model"_L1, m_connDetails.modelName },
|
{ "model"_L1, m_description->modelName() },
|
||||||
{ "max_completion_tokens"_L1, qint64(params.n_predict) },
|
{ "stream"_L1, true },
|
||||||
{ "stream"_L1, true },
|
|
||||||
{ "temperature"_L1, params.temperature },
|
|
||||||
{ "top_p"_L1, params.top_p },
|
|
||||||
});
|
});
|
||||||
|
extend(reqBody, params.toMap());
|
||||||
|
|
||||||
// conversation history
|
// conversation history
|
||||||
{
|
{
|
||||||
@ -184,9 +238,10 @@ auto OpenaiLLModel::chat(QStringView prompt, const backend::GenerationParams &pa
|
|||||||
reqBody.insert("messages"_L1, *messages);
|
reqBody.insert("messages"_L1, *messages);
|
||||||
}
|
}
|
||||||
|
|
||||||
QNetworkRequest request(m_connDetails.baseUrl.resolved(QUrl("/v1/chat/completions")));
|
auto &provider = *m_description->provider();
|
||||||
|
QNetworkRequest request(provider.baseUrl().resolved(QUrl("/v1/chat/completions")));
|
||||||
request.setHeader(QNetworkRequest::UserAgentHeader, mySettings->userAgent());
|
request.setHeader(QNetworkRequest::UserAgentHeader, mySettings->userAgent());
|
||||||
request.setRawHeader("authorization", u"Bearer %1"_s.arg(m_connDetails.apiKey).toUtf8());
|
request.setRawHeader("authorization", u"Bearer %1"_s.arg(provider.apiKey()).toUtf8());
|
||||||
|
|
||||||
QRestAccessManager restNam(m_nam);
|
QRestAccessManager restNam(m_nam);
|
||||||
std::unique_ptr<QNetworkReply> reply(restNam.post(request, QJsonDocument(reqBody)));
|
std::unique_ptr<QNetworkReply> reply(restNam.post(request, QJsonDocument(reqBody)));
|
139
gpt4all-chat/src/llmodel_openai.h
Normal file
139
gpt4all-chat/src/llmodel_openai.h
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "llmodel_chat.h"
|
||||||
|
#include "llmodel_description.h"
|
||||||
|
#include "llmodel_provider.h"
|
||||||
|
|
||||||
|
#include <QLatin1StringView> // IWYU pragma: keep
|
||||||
|
#include <QObject> // IWYU pragma: keep
|
||||||
|
#include <QString>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QVariant>
|
||||||
|
#include <QtTypes> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
class QNetworkAccessManager;
|
||||||
|
template <typename Key, typename T> class QMap;
|
||||||
|
template <typename T> class QSet;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
class OpenaiChatModel;
|
||||||
|
|
||||||
|
struct OpenaiGenerationParamsData {
|
||||||
|
uint n_predict;
|
||||||
|
float temperature;
|
||||||
|
float top_p;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiGenerationParams : public GenerationParams, public OpenaiGenerationParamsData {
|
||||||
|
public:
|
||||||
|
explicit OpenaiGenerationParams(QMap<GenerationParam, QVariant> values) { parse(std::move(values)); }
|
||||||
|
auto toMap() const -> QMap<QLatin1StringView, QVariant> override;
|
||||||
|
bool isNoop() const override { return !n_predict; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void parseInner(QMap<GenerationParam, QVariant> &values) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiProvider : public QObject, public virtual ModelProvider {
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit OpenaiProvider() = default; // custom
|
||||||
|
explicit OpenaiProvider(QString apiKey) // built-in
|
||||||
|
: m_apiKey(std::move(apiKey))
|
||||||
|
{}
|
||||||
|
public:
|
||||||
|
~OpenaiProvider() noexcept override = 0;
|
||||||
|
|
||||||
|
QObject *asQObject() override { return this; }
|
||||||
|
const QObject *asQObject() const override { return this; }
|
||||||
|
|
||||||
|
[[nodiscard]] const QString &apiKey() const { return m_apiKey; }
|
||||||
|
|
||||||
|
auto supportedGenerationParams() const -> QSet<GenerationParam> override;
|
||||||
|
auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> OpenaiGenerationParams * override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
QString m_apiKey;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiProviderBuiltin : public ModelProviderBuiltin, public OpenaiProvider {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QString apiKey READ apiKey CONSTANT)
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Create a new built-in OpenAI provider (transient).
|
||||||
|
explicit OpenaiProviderBuiltin(QUuid id, QString name, QUrl baseUrl, QString apiKey);
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiProviderCustom final : public OpenaiProvider, public ModelProviderCustom {
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
Q_PROPERTY(QString apiKey READ apiKey WRITE setApiKey NOTIFY apiKeyChanged)
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Load an existing OpenaiProvider from disk.
|
||||||
|
explicit OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QUuid id);
|
||||||
|
|
||||||
|
/// Create a new OpenaiProvider on disk.
|
||||||
|
explicit OpenaiProviderCustom(std::shared_ptr<ProviderStore> store, QString name, QUrl baseUrl, QString apiKey);
|
||||||
|
|
||||||
|
void setApiKey(QString value) { setMemberProp<QString>(&OpenaiProviderCustom::m_apiKey, "apiKey", std::move(value)); }
|
||||||
|
|
||||||
|
Q_SIGNALS:
|
||||||
|
void nameChanged (const QString &value);
|
||||||
|
void baseUrlChanged(const QUrl &value);
|
||||||
|
void apiKeyChanged (const QString &value);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
auto asData() -> ModelProviderData override
|
||||||
|
{ return { m_id, ProviderType::openai, m_name, m_baseUrl, OpenaiProviderDetails { m_apiKey } }; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiModelDescription : public ModelDescription {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QString modelName READ modelName CONSTANT)
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit OpenaiModelDescription(std::shared_ptr<const OpenaiProvider> provider, QString modelName);
|
||||||
|
|
||||||
|
// getters
|
||||||
|
[[nodiscard]] auto provider () const -> const OpenaiProvider * override { return m_provider.get(); }
|
||||||
|
[[nodiscard]] QVariant key () const override { return m_modelName; }
|
||||||
|
[[nodiscard]] const QString &modelName() const { return m_modelName; }
|
||||||
|
|
||||||
|
[[nodiscard]] auto newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr<OpenaiChatModel>;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
[[nodiscard]] auto newInstanceImpl(QNetworkAccessManager *nam) const -> ChatLLMInstance * override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const OpenaiProvider> m_provider;
|
||||||
|
QString m_modelName;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenaiChatModel : public ChatLLMInstance {
|
||||||
|
public:
|
||||||
|
explicit OpenaiChatModel(std::shared_ptr<const OpenaiModelDescription> description, QNetworkAccessManager *nam);
|
||||||
|
|
||||||
|
auto description() const -> const OpenaiModelDescription * override
|
||||||
|
{ return m_description.get(); }
|
||||||
|
|
||||||
|
auto preload() -> QCoro::Task<void> override;
|
||||||
|
|
||||||
|
auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata)
|
||||||
|
-> QCoro::AsyncGenerator<QString> override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const OpenaiModelDescription> m_description;
|
||||||
|
QNetworkAccessManager *m_nam;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
139
gpt4all-chat/src/llmodel_provider.cpp
Normal file
139
gpt4all-chat/src/llmodel_provider.cpp
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
#include "llmodel_provider.h"
|
||||||
|
|
||||||
|
#include "mysettings.h"
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include <gpt4all-backend/formatters.h> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <QModelIndex> // IWYU pragma: keep
|
||||||
|
#include <QVariant>
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
void GenerationParams::parse(QMap<GenerationParam, QVariant> values)
|
||||||
|
{
|
||||||
|
parseInner(values);
|
||||||
|
if (!values.isEmpty()) {
|
||||||
|
auto gparamsMeta = QMetaEnum::fromType<GenerationParam>();
|
||||||
|
throw std::invalid_argument(fmt::format(
|
||||||
|
" unsupported param: {}", gparamsMeta.valueToKey(int(values.keys().constFirst()))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariant GenerationParams::tryParseValue(QMap<GenerationParam, QVariant> &values, GenerationParam key,
|
||||||
|
const QMetaType &type)
|
||||||
|
{
|
||||||
|
auto value = values.take(key);
|
||||||
|
if (value.isValid() && !value.canConvert(type)) {
|
||||||
|
auto gparamsMeta = QMetaEnum::fromType<GenerationParam>();
|
||||||
|
throw std::invalid_argument(fmt::format(
|
||||||
|
"expected {} of type {}, got {}", gparamsMeta.valueToKey(int(key)), type.name(), value.typeName()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModelProviderCustom::~ModelProviderCustom() noexcept
|
||||||
|
{
|
||||||
|
if (auto res = m_store->release(m_id); !res)
|
||||||
|
res.error().raise(); // should not happen - will terminate program
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ModelProviderCustom::load() -> const ModelProviderData::Details &
|
||||||
|
{
|
||||||
|
auto data = m_store->acquire(m_id);
|
||||||
|
if (!data)
|
||||||
|
data.error().raise();
|
||||||
|
m_name = (*data)->name;
|
||||||
|
m_baseUrl = (*data)->base_url;
|
||||||
|
return (*data)->details;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProviderRegistry::ProviderRegistry(fs::path path)
|
||||||
|
: m_store(std::move(path))
|
||||||
|
{
|
||||||
|
auto *mysettings = MySettings::globalInstance();
|
||||||
|
connect(mysettings, &MySettings::modelPathChanged, this, &ProviderRegistry::onModelPathChanged);
|
||||||
|
}
|
||||||
|
|
||||||
|
Q_INVOKABLE void ProviderRegistry::registerBuiltinProvider(ModelProviderBuiltin *provider)
|
||||||
|
{
|
||||||
|
auto [_, unique] = m_providers.emplace(provider->id(), provider->asQObject());
|
||||||
|
if (!unique)
|
||||||
|
qWarning() << "ignoring duplicate provider:" << provider->id();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]]
|
||||||
|
bool ProviderRegistry::registerCustomProvider(std::unique_ptr<ModelProviderCustom> provider)
|
||||||
|
{
|
||||||
|
auto [_, unique] = m_providers.emplace(provider->id(), provider->asQObject());
|
||||||
|
if (unique) {
|
||||||
|
m_customProviders.push_back(std::move(provider));
|
||||||
|
emit customProviderAdded(m_customProviders.size() - 1);
|
||||||
|
}
|
||||||
|
return unique;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs::path ProviderRegistry::getSubdir()
|
||||||
|
{
|
||||||
|
auto *mysettings = MySettings::globalInstance();
|
||||||
|
return toFSPath(mysettings->modelPath()) / "providers";
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProviderRegistry::onModelPathChanged()
|
||||||
|
{
|
||||||
|
auto path = getSubdir();
|
||||||
|
if (path != m_store.path()) {
|
||||||
|
emit aboutToBeCleared();
|
||||||
|
m_customProviders.clear(); // delete custom providers to release store locks
|
||||||
|
if (auto res = m_store.setPath(path); !res)
|
||||||
|
res.error().raise(); // should not happen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CustomProviderList::CustomProviderList(QPointer<ProviderRegistry> registry)
|
||||||
|
: m_registry(std::move(registry))
|
||||||
|
, m_size(m_registry->customProviderCount())
|
||||||
|
{
|
||||||
|
connect(m_registry, &ProviderRegistry::customProviderAdded, this, &CustomProviderList::onCustomProviderAdded);
|
||||||
|
connect(m_registry, &ProviderRegistry::aboutToBeCleared, this, &CustomProviderList::onAboutToBeCleared,
|
||||||
|
Qt::DirectConnection);
|
||||||
|
}
|
||||||
|
|
||||||
|
QVariant CustomProviderList::data(const QModelIndex &index, int role) const
|
||||||
|
{
|
||||||
|
if (index.isValid() && index.row() < rowCount() && role == Qt::DisplayRole)
|
||||||
|
return QVariant::fromValue(m_registry->customProviderAt(index.row()));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
void CustomProviderList::onCustomProviderAdded(size_t index)
|
||||||
|
{
|
||||||
|
beginInsertRows({}, m_size, m_size);
|
||||||
|
m_size++;
|
||||||
|
endInsertRows();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CustomProviderList::onAboutToBeCleared()
|
||||||
|
{
|
||||||
|
beginResetModel();
|
||||||
|
m_size = 0;
|
||||||
|
endResetModel();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CustomProviderListSort::lessThan(const QModelIndex &left, const QModelIndex &right) const
|
||||||
|
{
|
||||||
|
auto *leftData = sourceModel()->data(left ).value<ModelProviderCustom *>();
|
||||||
|
auto *rightData = sourceModel()->data(right).value<ModelProviderCustom *>();
|
||||||
|
if (leftData && rightData)
|
||||||
|
return QString::localeAwareCompare(leftData->name(), rightData->name()) < 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
199
gpt4all-chat/src/llmodel_provider.h
Normal file
199
gpt4all-chat/src/llmodel_provider.h
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "store_provider.h"
|
||||||
|
|
||||||
|
#include "utils.h" // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <QAbstractListModel>
|
||||||
|
#include <QObject>
|
||||||
|
#include <QPointer>
|
||||||
|
#include <QQmlEngine> // IWYU pragma: keep
|
||||||
|
#include <QSortFilterProxyModel>
|
||||||
|
#include <QString>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QUuid>
|
||||||
|
#include <QtPreprocessorSupport>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <memory>
|
||||||
|
#include <string_view>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class QJSEngine;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
Q_NAMESPACE
|
||||||
|
|
||||||
|
enum class GenerationParam {
|
||||||
|
NPredict,
|
||||||
|
Temperature,
|
||||||
|
TopP,
|
||||||
|
TopK,
|
||||||
|
MinP,
|
||||||
|
RepeatPenalty,
|
||||||
|
RepeatLastN,
|
||||||
|
};
|
||||||
|
Q_ENUM_NS(GenerationParam)
|
||||||
|
|
||||||
|
class GenerationParams {
|
||||||
|
public:
|
||||||
|
virtual ~GenerationParams() noexcept = 0;
|
||||||
|
|
||||||
|
virtual QMap<QLatin1StringView, QVariant> toMap() const = 0;
|
||||||
|
virtual bool isNoop() const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void parse(QMap<GenerationParam, QVariant> values);
|
||||||
|
virtual void parseInner(QMap<GenerationParam, QVariant> &values) = 0;
|
||||||
|
|
||||||
|
static QVariant tryParseValue(QMap<GenerationParam, QVariant> &values, GenerationParam key, const QMetaType &type);
|
||||||
|
|
||||||
|
template <typename T, typename S, typename C>
|
||||||
|
void tryParseValue(this S &self, QMap<GenerationParam, QVariant> &values, GenerationParam key, T C::* dest);
|
||||||
|
};
|
||||||
|
|
||||||
|
class ModelProvider {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QUuid id READ id CONSTANT)
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit ModelProvider(QUuid id) // load
|
||||||
|
: m_id(std::move(id)) {}
|
||||||
|
explicit ModelProvider(QUuid id, QString name, QUrl baseUrl) // create built-in
|
||||||
|
: m_id(std::move(id)), m_name(std::move(name)), m_baseUrl(std::move(baseUrl)) {}
|
||||||
|
explicit ModelProvider(QString name, QUrl baseUrl) // create custom
|
||||||
|
: m_name(std::move(name)), m_baseUrl(std::move(baseUrl)) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~ModelProvider() noexcept = 0;
|
||||||
|
|
||||||
|
virtual QObject *asQObject() = 0;
|
||||||
|
virtual const QObject *asQObject() const = 0;
|
||||||
|
|
||||||
|
// getters
|
||||||
|
[[nodiscard]] const QUuid &id () const { return m_id; }
|
||||||
|
[[nodiscard]] const QString &name () const { return m_name; }
|
||||||
|
[[nodiscard]] const QUrl &baseUrl() const { return m_baseUrl; }
|
||||||
|
|
||||||
|
virtual auto supportedGenerationParams() const -> QSet<GenerationParam> = 0;
|
||||||
|
virtual auto makeGenerationParams(const QMap<GenerationParam, QVariant> &values) const -> GenerationParams * = 0;
|
||||||
|
|
||||||
|
friend bool operator==(const ModelProvider &a, const ModelProvider &b)
|
||||||
|
{ return a.m_id == b.m_id; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
QUuid m_id;
|
||||||
|
QString m_name;
|
||||||
|
QUrl m_baseUrl;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ModelProviderBuiltin : public virtual ModelProvider {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QString name READ name CONSTANT)
|
||||||
|
Q_PROPERTY(QUrl baseUrl READ baseUrl CONSTANT)
|
||||||
|
|
||||||
|
public:
|
||||||
|
~ModelProviderBuiltin() noexcept override = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ModelProviderCustom : public virtual ModelProvider {
|
||||||
|
Q_GADGET
|
||||||
|
Q_PROPERTY(QString name READ name WRITE setName NOTIFY nameChanged )
|
||||||
|
Q_PROPERTY(QUrl baseUrl READ baseUrl WRITE setBaseUrl NOTIFY baseUrlChanged)
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit ModelProviderCustom(std::shared_ptr<ProviderStore> store)
|
||||||
|
: m_store(std::move(store)) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
~ModelProviderCustom() noexcept override;
|
||||||
|
|
||||||
|
// setters
|
||||||
|
void setName (QString value) { setMemberProp<QString>(&ModelProviderCustom::m_name, "name", std::move(value)); }
|
||||||
|
void setBaseUrl(QUrl value) { setMemberProp<QUrl >(&ModelProviderCustom::m_baseUrl, "baseUrl", std::move(value)); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual auto load() -> const ModelProviderData::Details &;
|
||||||
|
virtual auto asData() -> ModelProviderData = 0;
|
||||||
|
|
||||||
|
template <typename T, typename S, typename C>
|
||||||
|
void setMemberProp(this S &self, T C::* member, std::string_view name, T value);
|
||||||
|
|
||||||
|
std::shared_ptr<ProviderStore> m_store;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ProviderRegistry : public QObject {
|
||||||
|
Q_OBJECT
|
||||||
|
QML_ELEMENT
|
||||||
|
QML_SINGLETON
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit ProviderRegistry(std::filesystem::path path);
|
||||||
|
|
||||||
|
public:
|
||||||
|
static ProviderRegistry *create(QQmlEngine *, QJSEngine *) { return new ProviderRegistry(getSubdir()); }
|
||||||
|
Q_INVOKABLE void registerBuiltinProvider(ModelProviderBuiltin *provider);
|
||||||
|
[[nodiscard]] bool registerCustomProvider (std::unique_ptr<ModelProviderCustom> provider);
|
||||||
|
|
||||||
|
size_t customProviderCount() const
|
||||||
|
{ return m_customProviders.size(); }
|
||||||
|
auto customProviderAt(size_t i) const -> const ModelProviderCustom *
|
||||||
|
{ return m_customProviders.at(i).get(); }
|
||||||
|
auto operator[](const QUuid &id) -> ModelProviderCustom *
|
||||||
|
{ return &dynamic_cast<ModelProviderCustom &>(*m_providers.at(id)); }
|
||||||
|
|
||||||
|
Q_SIGNALS:
|
||||||
|
void customProviderAdded(size_t index);
|
||||||
|
void aboutToBeCleared();
|
||||||
|
|
||||||
|
private:
|
||||||
|
static auto getSubdir() -> std::filesystem::path;
|
||||||
|
|
||||||
|
private Q_SLOTS:
|
||||||
|
void onModelPathChanged();
|
||||||
|
|
||||||
|
private:
|
||||||
|
ProviderStore m_store;
|
||||||
|
std::unordered_map<QUuid, QPointer<QObject>> m_providers;
|
||||||
|
std::vector<std::unique_ptr<ModelProviderCustom>> m_customProviders;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CustomProviderList : public QAbstractListModel {
|
||||||
|
Q_OBJECT
|
||||||
|
QML_ELEMENT
|
||||||
|
|
||||||
|
protected:
|
||||||
|
explicit CustomProviderList(QPointer<ProviderRegistry> registry);
|
||||||
|
|
||||||
|
public:
|
||||||
|
int rowCount(const QModelIndex &parent = {}) const override
|
||||||
|
{ Q_UNUSED(parent) return int(m_size); }
|
||||||
|
QVariant data(const QModelIndex &index, int role) const override;
|
||||||
|
|
||||||
|
private Q_SLOTS:
|
||||||
|
void onCustomProviderAdded(size_t index);
|
||||||
|
void onAboutToBeCleared();
|
||||||
|
|
||||||
|
private:
|
||||||
|
QPointer<ProviderRegistry> m_registry;
|
||||||
|
size_t m_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CustomProviderListSort : public QSortFilterProxyModel {
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
protected:
|
||||||
|
bool lessThan(const QModelIndex &left, const QModelIndex &right) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
||||||
|
|
||||||
|
|
||||||
|
#include "llmodel_provider.inl" // IWYU pragma: export
|
30
gpt4all-chat/src/llmodel_provider.inl
Normal file
30
gpt4all-chat/src/llmodel_provider.inl
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T, typename S, typename C>
|
||||||
|
void GenerationParams::tryParseValue(this S &self, QMap<GenerationParam, QVariant> &values, GenerationParam key,
|
||||||
|
T C::* dest)
|
||||||
|
{
|
||||||
|
if (auto value = tryParseValue(values, key, QMetaType::fromType<T>()); value.isValid())
|
||||||
|
self.*dest = value.template value<T>();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename S, typename C>
|
||||||
|
void ModelProviderCustom::setMemberProp(this S &self, T C::* member, std::string_view name, T value)
|
||||||
|
{
|
||||||
|
auto &mpc = static_cast<ModelProviderCustom &>(self);
|
||||||
|
auto &cur = self.*member;
|
||||||
|
if (cur != value) {
|
||||||
|
cur = std::move(value);
|
||||||
|
auto data = mpc.asData();
|
||||||
|
if (auto res = mpc.m_store->setData(std::move(data)); !res)
|
||||||
|
res.error().raise();
|
||||||
|
QMetaObject::invokeMethod(self.asQObject(), fmt::format("{}Changed", name).c_str(), cur);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
@ -16,6 +16,7 @@
|
|||||||
#include <QSortFilterProxyModel>
|
#include <QSortFilterProxyModel>
|
||||||
#include <QSslError>
|
#include <QSslError>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
|
#include <QUuid>
|
||||||
#include <QVariant>
|
#include <QVariant>
|
||||||
#include <QVector> // IWYU pragma: keep
|
#include <QVector> // IWYU pragma: keep
|
||||||
#include <Qt>
|
#include <Qt>
|
||||||
@ -27,6 +28,7 @@
|
|||||||
// IWYU pragma: no_forward_declare QObject
|
// IWYU pragma: no_forward_declare QObject
|
||||||
// IWYU pragma: no_forward_declare QSslError
|
// IWYU pragma: no_forward_declare QSslError
|
||||||
class QUrl;
|
class QUrl;
|
||||||
|
namespace gpt4all::ui { class ModelDescription; }
|
||||||
|
|
||||||
using namespace Qt::Literals::StringLiterals;
|
using namespace Qt::Literals::StringLiterals;
|
||||||
|
|
||||||
@ -75,6 +77,7 @@ private:
|
|||||||
struct ModelInfo {
|
struct ModelInfo {
|
||||||
Q_GADGET
|
Q_GADGET
|
||||||
Q_PROPERTY(QString id READ id WRITE setId)
|
Q_PROPERTY(QString id READ id WRITE setId)
|
||||||
|
Q_PROPERTY(const ModelDescription *modelDesc READ modelDescQt WRITE setModelDescQt)
|
||||||
Q_PROPERTY(QString name READ name WRITE setName)
|
Q_PROPERTY(QString name READ name WRITE setName)
|
||||||
Q_PROPERTY(QString filename READ filename WRITE setFilename)
|
Q_PROPERTY(QString filename READ filename WRITE setFilename)
|
||||||
Q_PROPERTY(QString dirpath MEMBER dirpath)
|
Q_PROPERTY(QString dirpath MEMBER dirpath)
|
||||||
@ -137,6 +140,13 @@ public:
|
|||||||
QString id() const;
|
QString id() const;
|
||||||
void setId(const QString &id);
|
void setId(const QString &id);
|
||||||
|
|
||||||
|
auto modelDesc() const -> const std::shared_ptr<const gpt4all::ui::ModelDescription> &;
|
||||||
|
auto modelDescQt() const -> const gpt4all::ui::ModelDescription *
|
||||||
|
{ return modelDesc().get(); }
|
||||||
|
|
||||||
|
void setModelDesc(std::shared_ptr<const gpt4all::ui::ModelDescription> value);
|
||||||
|
void setModelDescQt(const gpt4all::ui::ModelDescription *); // TODO: implement
|
||||||
|
|
||||||
QString name() const;
|
QString name() const;
|
||||||
void setName(const QString &name);
|
void setName(const QString &name);
|
||||||
|
|
||||||
@ -247,6 +257,7 @@ private:
|
|||||||
QVariant getField(QLatin1StringView name) const;
|
QVariant getField(QLatin1StringView name) const;
|
||||||
|
|
||||||
QString m_id;
|
QString m_id;
|
||||||
|
std::shared_ptr<const gpt4all::ui::ModelDescription> m_modelDesc;
|
||||||
QString m_name;
|
QString m_name;
|
||||||
QString m_filename;
|
QString m_filename;
|
||||||
QString m_description;
|
QString m_description;
|
||||||
|
@ -48,7 +48,6 @@ namespace ModelSettingsKey { namespace {
|
|||||||
|
|
||||||
namespace defaults {
|
namespace defaults {
|
||||||
|
|
||||||
static const int threadCount = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
|
||||||
static const bool networkIsActive = false;
|
static const bool networkIsActive = false;
|
||||||
static const bool networkUsageStatsActive = false;
|
static const bool networkUsageStatsActive = false;
|
||||||
static const QString device = "Auto";
|
static const QString device = "Auto";
|
||||||
@ -254,7 +253,6 @@ void MySettings::restoreApplicationDefaults()
|
|||||||
setChatTheme(basicDefaults.value("chatTheme").value<ChatTheme>());
|
setChatTheme(basicDefaults.value("chatTheme").value<ChatTheme>());
|
||||||
setFontSize(basicDefaults.value("fontSize").value<FontSize>());
|
setFontSize(basicDefaults.value("fontSize").value<FontSize>());
|
||||||
setDevice(defaults::device);
|
setDevice(defaults::device);
|
||||||
setThreadCount(defaults::threadCount);
|
|
||||||
setSystemTray(basicDefaults.value("systemTray").toBool());
|
setSystemTray(basicDefaults.value("systemTray").toBool());
|
||||||
setServerChat(basicDefaults.value("serverChat").toBool());
|
setServerChat(basicDefaults.value("serverChat").toBool());
|
||||||
setNetworkPort(basicDefaults.value("networkPort").toInt());
|
setNetworkPort(basicDefaults.value("networkPort").toInt());
|
||||||
@ -596,29 +594,6 @@ void MySettings::setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QS
|
|||||||
setModelSetting("suggestedFollowUpPrompt", info, value, force, true);
|
setModelSetting("suggestedFollowUpPrompt", info, value, force, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
int MySettings::threadCount() const
|
|
||||||
{
|
|
||||||
int c = m_settings.value("threadCount", defaults::threadCount).toInt();
|
|
||||||
// The old thread setting likely left many people with 0 in settings config file, which means
|
|
||||||
// we should reset it to the default going forward
|
|
||||||
if (c <= 0)
|
|
||||||
c = defaults::threadCount;
|
|
||||||
c = std::max(c, 1);
|
|
||||||
c = std::min(c, QThread::idealThreadCount());
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MySettings::setThreadCount(int value)
|
|
||||||
{
|
|
||||||
if (threadCount() == value)
|
|
||||||
return;
|
|
||||||
|
|
||||||
value = std::max(value, 1);
|
|
||||||
value = std::min(value, QThread::idealThreadCount());
|
|
||||||
m_settings.setValue("threadCount", value);
|
|
||||||
emit threadCountChanged();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MySettings::systemTray() const { return getBasicSetting("systemTray" ).toBool(); }
|
bool MySettings::systemTray() const { return getBasicSetting("systemTray" ).toBool(); }
|
||||||
bool MySettings::serverChat() const { return getBasicSetting("serverChat" ).toBool(); }
|
bool MySettings::serverChat() const { return getBasicSetting("serverChat" ).toBool(); }
|
||||||
int MySettings::networkPort() const { return getBasicSetting("networkPort" ).toInt(); }
|
int MySettings::networkPort() const { return getBasicSetting("networkPort" ).toInt(); }
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
// IWYU pragma: no_forward_declare QModelIndex
|
// IWYU pragma: no_forward_declare QModelIndex
|
||||||
class QLocale;
|
class QLocale;
|
||||||
|
namespace gpt4all::ui { class GenerationParams; }
|
||||||
|
|
||||||
|
|
||||||
namespace MySettingsEnums {
|
namespace MySettingsEnums {
|
||||||
@ -54,7 +55,6 @@ using namespace MySettingsEnums;
|
|||||||
class MySettings : public QObject
|
class MySettings : public QObject
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
Q_PROPERTY(int threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)
|
|
||||||
Q_PROPERTY(bool systemTray READ systemTray WRITE setSystemTray NOTIFY systemTrayChanged)
|
Q_PROPERTY(bool systemTray READ systemTray WRITE setSystemTray NOTIFY systemTrayChanged)
|
||||||
Q_PROPERTY(bool serverChat READ serverChat WRITE setServerChat NOTIFY serverChatChanged)
|
Q_PROPERTY(bool serverChat READ serverChat WRITE setServerChat NOTIFY serverChatChanged)
|
||||||
Q_PROPERTY(QString modelPath READ modelPath WRITE setModelPath NOTIFY modelPathChanged)
|
Q_PROPERTY(QString modelPath READ modelPath WRITE setModelPath NOTIFY modelPathChanged)
|
||||||
@ -156,9 +156,10 @@ public:
|
|||||||
QString modelSuggestedFollowUpPrompt(const ModelInfo &info) const;
|
QString modelSuggestedFollowUpPrompt(const ModelInfo &info) const;
|
||||||
Q_INVOKABLE void setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QString &value, bool force = false);
|
Q_INVOKABLE void setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QString &value, bool force = false);
|
||||||
|
|
||||||
|
// TODO: implement
|
||||||
|
auto modelGenParams(const ModelInfo &info) -> gpt4all::ui::GenerationParams;
|
||||||
|
|
||||||
// Application settings
|
// Application settings
|
||||||
int threadCount() const;
|
|
||||||
void setThreadCount(int value);
|
|
||||||
bool systemTray() const;
|
bool systemTray() const;
|
||||||
void setSystemTray(bool value);
|
void setSystemTray(bool value);
|
||||||
bool serverChat() const;
|
bool serverChat() const;
|
||||||
@ -173,10 +174,6 @@ public:
|
|||||||
void setFontSize(FontSize value);
|
void setFontSize(FontSize value);
|
||||||
QString device();
|
QString device();
|
||||||
void setDevice(const QString &value);
|
void setDevice(const QString &value);
|
||||||
int32_t contextLength() const;
|
|
||||||
void setContextLength(int32_t value);
|
|
||||||
int32_t gpuLayers() const;
|
|
||||||
void setGpuLayers(int32_t value);
|
|
||||||
SuggestionMode suggestionMode() const;
|
SuggestionMode suggestionMode() const;
|
||||||
void setSuggestionMode(SuggestionMode value);
|
void setSuggestionMode(SuggestionMode value);
|
||||||
|
|
||||||
@ -231,7 +228,6 @@ Q_SIGNALS:
|
|||||||
void systemMessageChanged(const ModelInfo &info, bool fromInfo = false);
|
void systemMessageChanged(const ModelInfo &info, bool fromInfo = false);
|
||||||
void chatNamePromptChanged(const ModelInfo &info);
|
void chatNamePromptChanged(const ModelInfo &info);
|
||||||
void suggestedFollowUpPromptChanged(const ModelInfo &info);
|
void suggestedFollowUpPromptChanged(const ModelInfo &info);
|
||||||
void threadCountChanged();
|
|
||||||
void systemTrayChanged();
|
void systemTrayChanged();
|
||||||
void serverChatChanged();
|
void serverChatChanged();
|
||||||
void modelPathChanged();
|
void modelPathChanged();
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include "chat.h"
|
#include "chat.h"
|
||||||
#include "chatmodel.h"
|
#include "chatmodel.h"
|
||||||
|
#include "llmodel_description.h"
|
||||||
#include "modellist.h"
|
#include "modellist.h"
|
||||||
#include "mysettings.h"
|
#include "mysettings.h"
|
||||||
|
|
||||||
@ -50,8 +51,10 @@
|
|||||||
#include <variant>
|
#include <variant>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
using namespace std::string_literals;
|
|
||||||
using namespace Qt::Literals::StringLiterals;
|
using namespace Qt::Literals::StringLiterals;
|
||||||
|
using namespace std::string_literals;
|
||||||
|
using namespace gpt4all;
|
||||||
|
using namespace gpt4all::ui;
|
||||||
|
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
|
|
||||||
@ -127,11 +130,11 @@ class BaseCompletionRequest {
|
|||||||
public:
|
public:
|
||||||
QString model; // required
|
QString model; // required
|
||||||
// NB: some parameters are not supported yet
|
// NB: some parameters are not supported yet
|
||||||
uint max_tokens = 16;
|
|
||||||
qint64 n = 1;
|
qint64 n = 1;
|
||||||
float temperature = 1.f;
|
std::optional<uint> max_tokens {};
|
||||||
float top_p = 1.f;
|
std::optional<float> temperature {};
|
||||||
float min_p = 0.f;
|
std::optional<float> top_p {};
|
||||||
|
std::optional<float> min_p {};
|
||||||
|
|
||||||
BaseCompletionRequest() = default;
|
BaseCompletionRequest() = default;
|
||||||
virtual ~BaseCompletionRequest() = default;
|
virtual ~BaseCompletionRequest() = default;
|
||||||
@ -162,7 +165,7 @@ protected:
|
|||||||
|
|
||||||
value = reqValue("max_tokens", Integer, false, /*min*/ 1);
|
value = reqValue("max_tokens", Integer, false, /*min*/ 1);
|
||||||
if (!value.isNull())
|
if (!value.isNull())
|
||||||
this->max_tokens = uint(qMin(value.toInteger(), UINT32_MAX));
|
this->max_tokens = uint(qMin(value.toInteger(), qint64(UINT32_MAX)));
|
||||||
|
|
||||||
value = reqValue("n", Integer, false, /*min*/ 1);
|
value = reqValue("n", Integer, false, /*min*/ 1);
|
||||||
if (!value.isNull())
|
if (!value.isNull())
|
||||||
@ -629,8 +632,6 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
|
|||||||
{
|
{
|
||||||
Q_ASSERT(m_chatModel);
|
Q_ASSERT(m_chatModel);
|
||||||
|
|
||||||
auto *mySettings = MySettings::globalInstance();
|
|
||||||
|
|
||||||
ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
|
ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
|
||||||
const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
|
const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
|
||||||
for (const ModelInfo &info : modelList) {
|
for (const ModelInfo &info : modelList) {
|
||||||
@ -662,22 +663,25 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
|
|||||||
return makeError(QHttpServerResponder::StatusCode::InternalServerError);
|
return makeError(QHttpServerResponder::StatusCode::InternalServerError);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GenerationParams> genParams;
|
||||||
|
{
|
||||||
|
using enum GenerationParam;
|
||||||
|
QMap<GenerationParam, QVariant> values;
|
||||||
|
if (auto v = request.max_tokens ) values.insert(NPredict, *v);
|
||||||
|
if (auto v = request.temperature) values.insert(Temperature, *v);
|
||||||
|
if (auto v = request.top_p ) values.insert(TopP, *v);
|
||||||
|
if (auto v = request.min_p ) values.insert(MinP, *v);
|
||||||
|
try {
|
||||||
|
genParams.reset(modelDescription()->makeGenerationParams(values));
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
throw InvalidRequestError(e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// add prompt/response items to GUI
|
// add prompt/response items to GUI
|
||||||
m_chatModel->appendPrompt(request.prompt);
|
m_chatModel->appendPrompt(request.prompt);
|
||||||
m_chatModel->appendResponse();
|
m_chatModel->appendResponse();
|
||||||
|
|
||||||
// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
|
|
||||||
backend::GenerationParams genParams {
|
|
||||||
.n_predict = request.max_tokens,
|
|
||||||
.top_k = mySettings->modelTopK(modelInfo),
|
|
||||||
.top_p = request.top_p,
|
|
||||||
.min_p = request.min_p,
|
|
||||||
.temp = request.temperature,
|
|
||||||
.n_batch = mySettings->modelPromptBatchSize(modelInfo),
|
|
||||||
.repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
|
|
||||||
.repeat_last_n = mySettings->modelRepeatPenaltyTokens(modelInfo),
|
|
||||||
};
|
|
||||||
|
|
||||||
auto promptUtf8 = request.prompt.toUtf8();
|
auto promptUtf8 = request.prompt.toUtf8();
|
||||||
int promptTokens = 0;
|
int promptTokens = 0;
|
||||||
int responseTokens = 0;
|
int responseTokens = 0;
|
||||||
@ -686,7 +690,7 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
|
|||||||
PromptResult result;
|
PromptResult result;
|
||||||
try {
|
try {
|
||||||
result = promptInternal(std::string_view(promptUtf8.cbegin(), promptUtf8.cend()),
|
result = promptInternal(std::string_view(promptUtf8.cbegin(), promptUtf8.cend()),
|
||||||
genParams,
|
*genParams,
|
||||||
/*usedLocalDocs*/ false);
|
/*usedLocalDocs*/ false);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
m_chatModel->setResponseValue(e.what());
|
m_chatModel->setResponseValue(e.what());
|
||||||
@ -733,8 +737,6 @@ auto Server::handleCompletionRequest(const CompletionRequest &request)
|
|||||||
auto Server::handleChatRequest(const ChatRequest &request)
|
auto Server::handleChatRequest(const ChatRequest &request)
|
||||||
-> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
|
-> std::pair<QHttpServerResponse, std::optional<QJsonObject>>
|
||||||
{
|
{
|
||||||
auto *mySettings = MySettings::globalInstance();
|
|
||||||
|
|
||||||
ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
|
ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo();
|
||||||
const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
|
const QList<ModelInfo> modelList = ModelList::globalInstance()->selectableModelList();
|
||||||
for (const ModelInfo &info : modelList) {
|
for (const ModelInfo &info : modelList) {
|
||||||
@ -779,17 +781,20 @@ auto Server::handleChatRequest(const ChatRequest &request)
|
|||||||
}
|
}
|
||||||
auto startOffset = m_chatModel->appendResponseWithHistory(messages);
|
auto startOffset = m_chatModel->appendResponseWithHistory(messages);
|
||||||
|
|
||||||
// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
|
std::unique_ptr<GenerationParams> genParams;
|
||||||
backend::GenerationParams genParams {
|
{
|
||||||
.n_predict = request.max_tokens,
|
using enum GenerationParam;
|
||||||
.top_k = mySettings->modelTopK(modelInfo),
|
QMap<GenerationParam, QVariant> values;
|
||||||
.top_p = request.top_p,
|
if (auto v = request.max_tokens ) values.insert(NPredict, *v);
|
||||||
.min_p = request.min_p,
|
if (auto v = request.temperature) values.insert(Temperature, *v);
|
||||||
.temp = request.temperature,
|
if (auto v = request.top_p ) values.insert(TopP, *v);
|
||||||
.n_batch = mySettings->modelPromptBatchSize(modelInfo),
|
if (auto v = request.min_p ) values.insert(MinP, *v);
|
||||||
.repeat_penalty = float(mySettings->modelRepeatPenalty(modelInfo)),
|
try {
|
||||||
.repeat_last_n = mySettings->modelRepeatPenaltyTokens(modelInfo),
|
genParams.reset(modelDescription()->makeGenerationParams(values));
|
||||||
};
|
} catch (const std::exception &e) {
|
||||||
|
throw InvalidRequestError(e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int promptTokens = 0;
|
int promptTokens = 0;
|
||||||
int responseTokens = 0;
|
int responseTokens = 0;
|
||||||
@ -797,7 +802,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
|
|||||||
for (int i = 0; i < request.n; ++i) {
|
for (int i = 0; i < request.n; ++i) {
|
||||||
ChatPromptResult result;
|
ChatPromptResult result;
|
||||||
try {
|
try {
|
||||||
result = promptInternalChat(m_collections, genParams, startOffset);
|
result = promptInternalChat(m_collections, *genParams, startOffset);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
m_chatModel->setResponseValue(e.what());
|
m_chatModel->setResponseValue(e.what());
|
||||||
m_chatModel->setError();
|
m_chatModel->setError();
|
||||||
|
164
gpt4all-chat/src/store_base.cpp
Normal file
164
gpt4all-chat/src/store_base.cpp
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
#include "store_base.h"
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include <gpt4all-backend/formatters.h> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <QByteArray>
|
||||||
|
#include <QDebug>
|
||||||
|
#include <QIODevice>
|
||||||
|
#include <QLatin1StringView> // IWYU pragma: keep
|
||||||
|
#include <QSaveFile>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QtAssert>
|
||||||
|
#include <QtLogging>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <system_error>
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
namespace json = boost::json;
|
||||||
|
namespace sys = boost::system;
|
||||||
|
using namespace Qt::StringLiterals;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
DataStoreError::DataStoreError(const QFileDevice *file)
|
||||||
|
: m_error(file->error())
|
||||||
|
, m_errorString(file->errorString())
|
||||||
|
{
|
||||||
|
Q_ASSERT(file->error());
|
||||||
|
}
|
||||||
|
|
||||||
|
DataStoreError::DataStoreError(const boost::system::system_error &e)
|
||||||
|
: m_error(e.code())
|
||||||
|
, m_errorString(QString::fromUtf8(e.what()))
|
||||||
|
{
|
||||||
|
Q_ASSERT(e.code());
|
||||||
|
}
|
||||||
|
|
||||||
|
DataStoreError::DataStoreError(QString e)
|
||||||
|
: m_error()
|
||||||
|
, m_errorString(e)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void DataStoreError::raise() const
|
||||||
|
{
|
||||||
|
std::visit(Overloaded {
|
||||||
|
[&](QFileDevice::FileError e) { throw FileError(m_errorString, e); },
|
||||||
|
[&](boost::system::error_code e) { throw std::runtime_error(m_errorString.toUtf8().constData()); },
|
||||||
|
[&](std::monostate ) { throw std::runtime_error(m_errorString.toUtf8().constData()); },
|
||||||
|
}, m_error);
|
||||||
|
Q_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::reload() -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
if (auto res = clear(); !res)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
json::stream_parser parser;
|
||||||
|
QFile file;
|
||||||
|
|
||||||
|
for (auto &entry : fs::directory_iterator(m_path)) {
|
||||||
|
file.setFileName(entry.path());
|
||||||
|
if (!file.open(QFile::ReadOnly)) {
|
||||||
|
qWarning().noquote() << "skipping unopenable file:" << file.fileName();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto jv = read(file, parser);
|
||||||
|
if (!jv) {
|
||||||
|
(qWarning().nospace() << "skipping " << file.fileName() << "because of read error: ").noquote()
|
||||||
|
<< jv.error().errorString();
|
||||||
|
} else if (auto [unique, uuid] = insert(*jv); !unique)
|
||||||
|
qWarning() << "skipping duplicate data store entry:" << uuid;
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::setPath(fs::path path) -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
if (path != m_path) {
|
||||||
|
m_path = std::move(path);
|
||||||
|
return reload();
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::getFilePath(const QString &name) -> std::filesystem::path
|
||||||
|
{ return m_path / fmt::format("{}.json", QLatin1StringView(normalizeName(name))); }
|
||||||
|
|
||||||
|
auto DataStoreBase::openNew(const QString &name) -> DataStoreResult<std::unique_ptr<QFile>>
|
||||||
|
{
|
||||||
|
auto path = getFilePath(name);
|
||||||
|
auto file = std::make_unique<QFile>(path);
|
||||||
|
if (file->exists())
|
||||||
|
return std::unexpected(sys::system_error(std::make_error_code(std::errc::file_exists), path.string()));
|
||||||
|
if (!file->open(QFile::WriteOnly | QFile::NewOnly))
|
||||||
|
return std::unexpected(&*file);
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::openExisting(const QString &name) -> DataStoreResult<std::unique_ptr<QSaveFile>>
|
||||||
|
{
|
||||||
|
auto path = getFilePath(name);
|
||||||
|
if (!QFile::exists(path))
|
||||||
|
return std::unexpected(sys::system_error(
|
||||||
|
std::make_error_code(std::errc::no_such_file_or_directory), path.string()
|
||||||
|
));
|
||||||
|
auto file = std::make_unique<QSaveFile>(toQString(path));
|
||||||
|
if (!file->open(QSaveFile::WriteOnly | QSaveFile::ExistingOnly))
|
||||||
|
return std::unexpected(&*file);
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::read(QFileDevice &file, boost::json::stream_parser &parser) -> DataStoreResult<boost::json::value>
|
||||||
|
{
|
||||||
|
for (;;) {
|
||||||
|
auto chunk = file.read(JSON_BUFSIZ);
|
||||||
|
if (file.error())
|
||||||
|
return std::unexpected(&file);
|
||||||
|
if (chunk.isEmpty()) {
|
||||||
|
Q_ASSERT(file.atEnd());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
parser.write(chunk.data(), chunk.size());
|
||||||
|
}
|
||||||
|
return parser.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto DataStoreBase::write(const json::value &value, QFileDevice &file) -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
m_serializer.reset(&value);
|
||||||
|
std::array<char, JSON_BUFSIZ> buf;
|
||||||
|
while (!m_serializer.done()) {
|
||||||
|
auto chunk = m_serializer.read(buf.data(), buf.size());
|
||||||
|
qint64 nWritten = file.write(chunk.data(), chunk.size());
|
||||||
|
if (nWritten < 0)
|
||||||
|
return std::unexpected(&file);
|
||||||
|
Q_ASSERT(nWritten == chunk.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!file.flush())
|
||||||
|
return std::unexpected(&file);
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray DataStoreBase::normalizeName(const QString &name)
|
||||||
|
{
|
||||||
|
auto lower = name.toLower();
|
||||||
|
auto norm = QUrl::toPercentEncoding(lower, /*exclude*/ " !#$%&'()+,;=@[]^`{}"_ba, /*include*/ "~"_ba);
|
||||||
|
|
||||||
|
// "." and ".." are special filenames
|
||||||
|
return norm == "."_ba ? "%2E"_ba :
|
||||||
|
norm == ".."_ba ? "%2E%2E"_ba :
|
||||||
|
norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
119
gpt4all-chat/src/store_base.h
Normal file
119
gpt4all-chat/src/store_base.h
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils.h" // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <boost/json.hpp> // IWYU pragma: keep
|
||||||
|
#include <boost/system.hpp> // IWYU pragma: keep
|
||||||
|
#include <tl/generator.hpp>
|
||||||
|
|
||||||
|
#include <QFile>
|
||||||
|
#include <QFileDevice>
|
||||||
|
#include <QString>
|
||||||
|
#include <QUuid>
|
||||||
|
#include <QtTypes> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <expected>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
class QByteArray;
|
||||||
|
class QSaveFile;
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
class DataStoreError {
|
||||||
|
public:
|
||||||
|
using ErrorCode = std::variant<
|
||||||
|
QFileDevice::FileError,
|
||||||
|
boost::system::error_code,
|
||||||
|
std::monostate
|
||||||
|
>;
|
||||||
|
|
||||||
|
DataStoreError(const QFileDevice *file);
|
||||||
|
DataStoreError(const boost::system::system_error &e);
|
||||||
|
DataStoreError(QString e);
|
||||||
|
|
||||||
|
[[nodiscard]] const ErrorCode &error () const { return m_error; }
|
||||||
|
[[nodiscard]] const QString &errorString() const { return m_errorString; }
|
||||||
|
|
||||||
|
[[noreturn]] void raise() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
ErrorCode m_error;
|
||||||
|
QString m_errorString;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T = void>
|
||||||
|
using DataStoreResult = std::expected<T, DataStoreError>;
|
||||||
|
|
||||||
|
class DataStoreBase {
|
||||||
|
protected:
|
||||||
|
explicit DataStoreBase(std::filesystem::path path)
|
||||||
|
: m_path(std::move(path))
|
||||||
|
{}
|
||||||
|
|
||||||
|
public:
|
||||||
|
auto path() const -> const std::filesystem::path & { return m_path; }
|
||||||
|
auto setPath(std::filesystem::path path) -> DataStoreResult<>;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
auto reload() -> DataStoreResult<>;
|
||||||
|
virtual auto clear() -> DataStoreResult<> = 0;
|
||||||
|
struct InsertResult { bool unique; QUuid uuid; };
|
||||||
|
virtual InsertResult insert(const boost::json::value &jv) = 0;
|
||||||
|
|
||||||
|
// helpers
|
||||||
|
auto getFilePath(const QString &name) -> std::filesystem::path;
|
||||||
|
auto openNew(const QString &name) -> DataStoreResult<std::unique_ptr<QFile>>;
|
||||||
|
auto openExisting(const QString &name) -> DataStoreResult<std::unique_ptr<QSaveFile>>;
|
||||||
|
static auto read(QFileDevice &file, boost::json::stream_parser &parser) -> DataStoreResult<boost::json::value>;
|
||||||
|
auto write(const boost::json::value &value, QFileDevice &file) -> DataStoreResult<>;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr uint JSON_BUFSIZ = 16384; // default QFILE_WRITEBUFFER_SIZE
|
||||||
|
|
||||||
|
static QByteArray normalizeName(const QString &name);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::filesystem::path m_path;
|
||||||
|
|
||||||
|
private:
|
||||||
|
boost::json::serializer m_serializer;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class DataStore : public DataStoreBase {
|
||||||
|
public:
|
||||||
|
explicit DataStore(std::filesystem::path path);
|
||||||
|
|
||||||
|
auto list() -> tl::generator<const T &>;
|
||||||
|
auto setData(T data) -> DataStoreResult<>;
|
||||||
|
auto remove(const QUuid &id) -> DataStoreResult<>;
|
||||||
|
|
||||||
|
auto acquire(QUuid id) -> DataStoreResult<const T *>;
|
||||||
|
auto release(const QUuid &id) -> DataStoreResult<>;
|
||||||
|
|
||||||
|
[[nodiscard]]
|
||||||
|
auto operator[](const QUuid &id) const -> const T &
|
||||||
|
{ return m_entries.at(id); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
auto createImpl(T data, const QString &name) -> DataStoreResult<const T *>;
|
||||||
|
auto clear() -> DataStoreResult<> final;
|
||||||
|
InsertResult insert(const boost::json::value &jv) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unordered_map<QUuid, T> m_entries;
|
||||||
|
std::unordered_set<QUuid> m_acquired;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
||||||
|
|
||||||
|
#include "store_base.inl" // IWYU pragma: export
|
128
gpt4all-chat/src/store_base.inl
Normal file
128
gpt4all-chat/src/store_base.inl
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
#include "json-helpers.h" // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <boost/json.hpp> // IWYU pragma: keep
|
||||||
|
#include <gpt4all-backend/json-helpers.h> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <QSaveFile>
|
||||||
|
#include <QtAssert>
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DataStore<T>::DataStore(std::filesystem::path path)
|
||||||
|
: DataStoreBase(std::move(path))
|
||||||
|
{
|
||||||
|
if (auto res = reload(); !res)
|
||||||
|
res.error().raise(); // should be impossible
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::list() -> tl::generator<const T &>
|
||||||
|
{
|
||||||
|
for (auto &[_, value] : m_entries)
|
||||||
|
co_yield value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::createImpl(T data, const QString &name) -> DataStoreResult<const T *>
|
||||||
|
{
|
||||||
|
// acquire path
|
||||||
|
auto file = openNew(name);
|
||||||
|
if (!file)
|
||||||
|
return std::unexpected(file.error());
|
||||||
|
|
||||||
|
// serialize
|
||||||
|
if (auto res = write(boost::json::value_from(data), **file); !res)
|
||||||
|
return std::unexpected(res.error());
|
||||||
|
|
||||||
|
// insert
|
||||||
|
auto [it, unique] = m_entries.emplace(data.id, std::move(data));
|
||||||
|
Q_ASSERT(unique);
|
||||||
|
|
||||||
|
// acquire data ownership
|
||||||
|
if (auto res = acquire(data.id); !res)
|
||||||
|
return std::unexpected(res.error());
|
||||||
|
|
||||||
|
return &it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::setData(T data) -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
// acquire path
|
||||||
|
auto file = openExisting(data.name);
|
||||||
|
if (!file)
|
||||||
|
return std::unexpected(file.error());
|
||||||
|
|
||||||
|
// serialize
|
||||||
|
if (auto res = write(boost::json::value_from(data), **file); !res)
|
||||||
|
return std::unexpected(res.error());
|
||||||
|
if (!(*file)->commit())
|
||||||
|
return std::unexpected(file->get());
|
||||||
|
|
||||||
|
// update
|
||||||
|
m_entries.at(data.id) = std::move(data);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::remove(const QUuid &id) -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
// acquire UUID
|
||||||
|
auto it = m_entries.find(id);
|
||||||
|
if (it == m_entries.end())
|
||||||
|
return std::unexpected(QStringLiteral("id not found: %1").arg(id.toString()));
|
||||||
|
|
||||||
|
auto &[_, data] = *it;
|
||||||
|
|
||||||
|
// remove the path
|
||||||
|
auto path = getFilePath(data.name);
|
||||||
|
QFile file(path);
|
||||||
|
if (!file.remove())
|
||||||
|
throw std::unexpected(&file);
|
||||||
|
|
||||||
|
// update cache
|
||||||
|
m_entries.erase(it);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::acquire(QUuid id) -> DataStoreResult<const T *>
|
||||||
|
{
|
||||||
|
auto [it, unique] = m_acquired.insert(std::move(id));
|
||||||
|
if (!unique)
|
||||||
|
return std::unexpected(QStringLiteral("id already acquired: %1").arg(id.toString()));
|
||||||
|
return &(*this)[*it];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::release(const QUuid &id) -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
if (!m_acquired.erase(id))
|
||||||
|
return std::unexpected(QStringLiteral("id not acquired: %1").arg(id.toString()));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::clear() -> DataStoreResult<>
|
||||||
|
{
|
||||||
|
if (!m_acquired.empty())
|
||||||
|
return std::unexpected(QStringLiteral("cannot clear data store with living references"));
|
||||||
|
m_entries.clear();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto DataStore<T>::insert(const boost::json::value &jv) -> InsertResult
|
||||||
|
{
|
||||||
|
auto data = boost::json::value_to<T>(jv);
|
||||||
|
auto id = data.id;
|
||||||
|
auto [_, ok] = m_entries.emplace(id, std::move(data));
|
||||||
|
return { ok, std::move(id) };
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
25
gpt4all-chat/src/store_provider.cpp
Normal file
25
gpt4all-chat/src/store_provider.cpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#include "store_provider.h"
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
auto ProviderStore::create(QString name, QUrl base_url, QString api_key)
|
||||||
|
-> DataStoreResult<const ModelProviderData *>
|
||||||
|
{
|
||||||
|
ModelProviderData data { QUuid::createUuid(), ProviderType::openai, name, std::move(base_url),
|
||||||
|
OpenaiProviderDetails { std::move(api_key) } };
|
||||||
|
return createImpl(std::move(data), name);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ProviderStore::create(QString name, QUrl base_url)
|
||||||
|
-> DataStoreResult<const ModelProviderData *>
|
||||||
|
{
|
||||||
|
ModelProviderData data { QUuid::createUuid(), ProviderType::ollama, name, std::move(base_url) };
|
||||||
|
return createImpl(std::move(data), name);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
49
gpt4all-chat/src/store_provider.h
Normal file
49
gpt4all-chat/src/store_provider.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "store_base.h"
|
||||||
|
|
||||||
|
#include <boost/describe/class.hpp>
|
||||||
|
#include <boost/describe/enum.hpp>
|
||||||
|
|
||||||
|
#include <QString>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QUuid>
|
||||||
|
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
|
||||||
|
namespace gpt4all::ui {
|
||||||
|
|
||||||
|
|
||||||
|
BOOST_DEFINE_ENUM_CLASS(ProviderType, openai, ollama)
|
||||||
|
|
||||||
|
struct OpenaiProviderDetails {
|
||||||
|
QString api_key;
|
||||||
|
};
|
||||||
|
BOOST_DESCRIBE_STRUCT(OpenaiProviderDetails, (), (api_key))
|
||||||
|
|
||||||
|
struct ModelProviderData {
|
||||||
|
using Details = std::variant<std::monostate, OpenaiProviderDetails>;
|
||||||
|
QUuid id;
|
||||||
|
ProviderType type;
|
||||||
|
QString name;
|
||||||
|
QUrl base_url;
|
||||||
|
Details details;
|
||||||
|
};
|
||||||
|
BOOST_DESCRIBE_STRUCT(ModelProviderData, (), (id, type, name, base_url, details))
|
||||||
|
|
||||||
|
class ProviderStore : public DataStore<ModelProviderData> {
|
||||||
|
private:
|
||||||
|
using Super = DataStore<ModelProviderData>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using Super::Super;
|
||||||
|
|
||||||
|
/// OpenAI
|
||||||
|
auto create(QString name, QUrl base_url, QString api_key) -> DataStoreResult<const ModelProviderData *>;
|
||||||
|
/// Ollama
|
||||||
|
auto create(QString name, QUrl base_url) -> DataStoreResult<const ModelProviderData *>;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace gpt4all::ui
|
@ -1,16 +1,56 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <QFileDevice>
|
||||||
|
#include <QHash>
|
||||||
#include <QJsonValue>
|
#include <QJsonValue>
|
||||||
|
#include <QBitArray> // for qHash overload // IWYU pragma: keep
|
||||||
#include <QLatin1StringView> // IWYU pragma: keep
|
#include <QLatin1StringView> // IWYU pragma: keep
|
||||||
|
|
||||||
|
#include <concepts>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <functional>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
|
#include <stdexcept>
|
||||||
#include <utility> // IWYU pragma: keep
|
#include <utility> // IWYU pragma: keep
|
||||||
|
|
||||||
// IWYU pragma: no_forward_declare QJsonValue
|
// IWYU pragma: no_forward_declare QJsonValue
|
||||||
class QJsonObject;
|
class QJsonObject;
|
||||||
|
class QVariant;
|
||||||
|
template <typename Key, typename T> class QMap;
|
||||||
|
|
||||||
|
|
||||||
// alternative to QJsonObject's initializer_list constructor that accepts Latin-1 strings
|
// alternative to QJsonObject's initializer_list constructor that accepts Latin-1 strings
|
||||||
QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args);
|
QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args);
|
||||||
|
|
||||||
|
QJsonObject &extend(QJsonObject &obj, const QMap<QLatin1StringView, QVariant> &values);
|
||||||
|
|
||||||
|
QString toQString(const std::filesystem::path &path);
|
||||||
|
auto toFSPath (const QString &str) -> std::filesystem::path;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
concept QHashable = requires(const T &x) {
|
||||||
|
{ qHash(x) } -> std::same_as<size_t>;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <QHashable T>
|
||||||
|
struct std::hash<T> {
|
||||||
|
size_t operator()(const T &value) const noexcept
|
||||||
|
{ return qHash(value); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class FileError : public std::runtime_error {
|
||||||
|
public:
|
||||||
|
explicit FileError(const QFileDevice *file)
|
||||||
|
: FileError(file->errorString(), file->error()) {}
|
||||||
|
explicit FileError(const QString &str, QFileDevice::FileError code);
|
||||||
|
QFileDevice::FileError code() const noexcept { return m_code; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
QFileDevice::FileError m_code;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename... Ts>
|
||||||
|
struct Overloaded : Ts... { using Ts::operator()...; };
|
||||||
|
|
||||||
|
|
||||||
#include "utils.inl" // IWYU pragma: export
|
#include "utils.inl" // IWYU pragma: export
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
#include <QJsonObject>
|
#include <QJsonObject>
|
||||||
|
#include <QMap>
|
||||||
|
#include <QVariant>
|
||||||
|
#include <QtAssert>
|
||||||
|
|
||||||
|
|
||||||
inline QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args)
|
inline QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringView, QJsonValue>> args)
|
||||||
@ -8,3 +11,34 @@ inline QJsonObject makeJsonObject(std::initializer_list<std::pair<QLatin1StringV
|
|||||||
obj.insert(arg.first, arg.second);
|
obj.insert(arg.first, arg.second);
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline QJsonObject &extend(QJsonObject &obj, const QMap<QLatin1StringView, QVariant> &values)
|
||||||
|
{
|
||||||
|
for (auto [key, value] : values.asKeyValueRange())
|
||||||
|
obj.insert(key, QJsonValue::fromVariant(value));
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from qfile.h
|
||||||
|
inline QString toQString(const std::filesystem::path &path)
|
||||||
|
{
|
||||||
|
#ifdef Q_OS_WIN
|
||||||
|
return QString::fromStdWString(path.native());
|
||||||
|
#else
|
||||||
|
return QString::fromStdString(path.native());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// copied from qfile.h
|
||||||
|
inline auto toFSPath(const QString &str) -> std::filesystem::path
|
||||||
|
{
|
||||||
|
return { reinterpret_cast<const char16_t *>(str.cbegin()),
|
||||||
|
reinterpret_cast<const char16_t *>(str.cend ()) };
|
||||||
|
}
|
||||||
|
|
||||||
|
FileError::FileError(const QString &str, QFileDevice::FileError code)
|
||||||
|
: std::runtime_error(str.toUtf8().constData())
|
||||||
|
, m_code(code)
|
||||||
|
{
|
||||||
|
Q_ASSERT(code);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user