diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 2548efb3..d59d3116 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -14,5 +14,5 @@ target_sources(gpt4all-backend PUBLIC FILE_SET public_headers TYPE HEADERS BASE_DIRS include FILES include/gpt4all-backend/formatters.h include/gpt4all-backend/ollama-client.h - include/gpt4all-backend/ollama-responses.h + include/gpt4all-backend/ollama-types.h ) diff --git a/gpt4all-backend/include/gpt4all-backend/ollama-client.h b/gpt4all-backend/include/gpt4all-backend/ollama-client.h index 60c9a4fd..2c956252 100644 --- a/gpt4all-backend/include/gpt4all-backend/ollama-client.h +++ b/gpt4all-backend/include/gpt4all-backend/ollama-client.h @@ -1,6 +1,6 @@ #pragma once -#include "ollama-responses.h" +#include "ollama-types.h" #include // IWYU pragma: keep @@ -14,6 +14,7 @@ #include #include +class QNetworkRequest; namespace boost::json { class value; } @@ -50,8 +51,9 @@ using DataOrRespErr = std::expected; class OllamaClient { public: - OllamaClient(QUrl baseUrl) + OllamaClient(QUrl baseUrl, QString m_userAgent = QStringLiteral("GPT4All")) : m_baseUrl(baseUrl) + , m_userAgent(std::move(m_userAgent)) {} const QUrl &baseUrl() const { return m_baseUrl; } @@ -59,24 +61,35 @@ public: /// Returns the version of the Ollama server. auto getVersion() -> QCoro::Task> - { return getSimple(QStringLiteral("version")); } + { return get(QStringLiteral("version")); } /// List models that are available locally. auto listModels() -> QCoro::Task> - { return getSimple(QStringLiteral("tags")); } + { return get(QStringLiteral("tags")); } + + /// Show details about a model including modelfile, template, parameters, license, and system prompt. + auto showModelInfo(const ollama::ModelInfoRequest &req) -> QCoro::Task> + { return post(QStringLiteral("show"), req); } private: - template - auto getSimple(const QString &endpoint) -> QCoro::Task>; + QNetworkRequest makeRequest(const QString &path) const; - auto getSimpleGeneric(const QString &endpoint) -> QCoro::Task>; + template + auto get(const QString &path) -> QCoro::Task>; + template + auto post(const QString &path, Req const &req) -> QCoro::Task>; + + auto getJson(const QString &path) -> QCoro::Task>; + auto postJson(const QString &path, const boost::json::value &req) + -> QCoro::Task>; private: QUrl m_baseUrl; + QString m_userAgent; QNetworkAccessManager m_nam; }; -extern template auto OllamaClient::getSimple(const QString &) -> QCoro::Task>; -extern template auto OllamaClient::getSimple(const QString &) -> QCoro::Task>; +extern template auto OllamaClient::get(const QString &) -> QCoro::Task>; +extern template auto OllamaClient::get(const QString &) -> QCoro::Task>; } // namespace gpt4all::backend diff --git a/gpt4all-backend/include/gpt4all-backend/ollama-responses.h b/gpt4all-backend/include/gpt4all-backend/ollama-responses.h deleted file mode 100644 index 69e75207..00000000 --- a/gpt4all-backend/include/gpt4all-backend/ollama-responses.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include - -#include -#include - -#include - - -namespace gpt4all::backend::ollama { - -/// Details about a model. -struct ModelDetails { - QString parent_model; /// The parent of the model. - QString format; /// The format of the model. - QString family; /// The family of the model. - std::vector families; /// The families of the model. - QString parameter_size; /// The size of the model's parameters. - QString quantization_level; /// The quantization level of the model. -}; -BOOST_DESCRIBE_STRUCT(ModelDetails, (), (parent_model, format, family, families, parameter_size, quantization_level)) - -/// A model available locally. -struct Model { - QString model; /// The model name. - QString modified_at; /// Model modification date. - quint64 size; /// Size of the model on disk. - QString digest; /// The model's digest. - ModelDetails details; /// The model's details. -}; -BOOST_DESCRIBE_STRUCT(Model, (), (model, modified_at, size, digest, details)) - - -/// The response class for the version endpoint. -struct VersionResponse { - QString version; /// The version of the Ollama server. -}; -BOOST_DESCRIBE_STRUCT(VersionResponse, (), (version)) - -/// Response class for the list models endpoint. -struct ModelsResponse { - std::vector models; /// List of models available locally. -}; -BOOST_DESCRIBE_STRUCT(ModelsResponse, (), (models)) - -} // namespace gpt4all::backend::ollama diff --git a/gpt4all-backend/include/gpt4all-backend/ollama-types.h b/gpt4all-backend/include/gpt4all-backend/ollama-types.h new file mode 100644 index 00000000..0a59fe8a --- /dev/null +++ b/gpt4all-backend/include/gpt4all-backend/ollama-types.h @@ -0,0 +1,120 @@ +#pragma once + +#ifdef G4A_BACKEND_IMPL +# include +# include +#endif +#include + +#include +#include + +#include + + +namespace gpt4all::backend::ollama { + +/// Details about a model. +struct ModelDetails { + QString parent_model; /// The parent of the model. + QString format; /// The format of the model. + QString family; /// The family of the model. + std::vector families; /// The families of the model. + QString parameter_size; /// The size of the model's parameters. + QString quantization_level; /// The quantization level of the model. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(ModelDetails, (), (parent_model, format, family, families, parameter_size, quantization_level)) +#endif + +/// A model available locally. +struct Model { + QString model; /// The model name. + QString modified_at; /// Model modification date. + quint64 size; /// Size of the model on disk. + QString digest; /// The model's digest. + ModelDetails details; /// The model's details. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(Model, (), (model, modified_at, size, digest, details)) +#endif + + +/// Request class for the show model info endpoint. +struct ModelInfoRequest { + QString model; /// The model name. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(ModelInfoRequest, (), (model)) +#endif + +enum MessageRole { + system, + user, + assistant, + tool, +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_ENUM(MessageRole, system, user, assistant, tool) +#endif + +struct ToolCallFunction { + QString name; /// The name of the function to be called. + boost::json::object arguments; /// The arguments to pass to the function. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(ToolCallFunction, (), (name, arguments)) +#endif + +struct ToolCall { + ToolCallFunction function; /// The function the model wants to call. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(ToolCall, (), (function)) +#endif + +/// A message in the chat endpoint +struct Message { + MessageRole role; /// The role of the message + QString content; /// The content of the message + std::vector images; /// (optional) a list of Base64-encoded images to include in the message + std::vector tool_calls; /// A list of tool calls the model wants to call. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(Message, (), (role, content, images, tool_calls)) +#endif + + +/// The response class for the version endpoint. +struct VersionResponse { + QString version; /// The version of the Ollama server. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(VersionResponse, (), (version)) +#endif + +/// Response class for the list models endpoint. +struct ModelsResponse { + std::vector models; /// List of models available locally. +}; +#ifdef G4A_BACKEND_IMPL +BOOST_DESCRIBE_STRUCT(ModelsResponse, (), (models)) +#endif + +/// Details about a model including modelfile, template, parameters, license, and system prompt. +struct ModelInfo { + std::optional license; /// The model's license. + std::optional modelfile; /// The modelfile associated with the model. + std::optional parameters; /// The model parameters. + std::optional template_; /// The prompt template for the model. + std::optional system; /// The system prompt for the model. + ModelDetails details; + boost::json::object model_info; + std::optional> messages; /// The default messages for the model. +}; + +#ifdef G4A_BACKEND_IMPL +ModelInfo tag_invoke(const boost::json::value_to_tag &, const boost::json::value &value); +#endif + +} // namespace gpt4all::backend::ollama diff --git a/gpt4all-backend/src/CMakeLists.txt b/gpt4all-backend/src/CMakeLists.txt index e4a70d8e..37c30390 100644 --- a/gpt4all-backend/src/CMakeLists.txt +++ b/gpt4all-backend/src/CMakeLists.txt @@ -3,8 +3,11 @@ set(TARGET gpt4all-backend) add_library(${TARGET} STATIC json-helpers.cpp ollama-client.cpp + ollama-types.cpp + qt-json-stream.cpp ) target_compile_features(${TARGET} PUBLIC cxx_std_23) +target_compile_definitions(${TARGET} PRIVATE G4A_BACKEND_IMPL) gpt4all_add_warning_options(${TARGET}) target_include_directories(${TARGET} PRIVATE . @@ -23,5 +26,5 @@ target_link_libraries(${TARGET} PRIVATE # link Boost::json as -isystem to suppress -Wundef get_target_property(LIB_INCLUDE_DIRS Boost::json INTERFACE_INCLUDE_DIRECTORIES) -target_include_directories(${TARGET} SYSTEM PRIVATE ${LIB_INCLUDE_DIRS}) -target_link_libraries(${TARGET} PRIVATE Boost::json) +target_include_directories(${TARGET} SYSTEM PUBLIC ${LIB_INCLUDE_DIRS}) +target_link_libraries(${TARGET} PUBLIC Boost::json) diff --git a/gpt4all-backend/src/json-helpers.cpp b/gpt4all-backend/src/json-helpers.cpp index 0907d749..b76f60bb 100644 --- a/gpt4all-backend/src/json-helpers.cpp +++ b/gpt4all-backend/src/json-helpers.cpp @@ -4,6 +4,14 @@ #include +namespace json = boost::json; + + +void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QString &qstr) +{ + auto utf8 = qstr.toUtf8(); + value = json::value_from(json::string_view(utf8.data(), utf8.size())); +} QString tag_invoke(const boost::json::value_to_tag &, const boost::json::value &value) { diff --git a/gpt4all-backend/src/json-helpers.h b/gpt4all-backend/src/json-helpers.h index 5990a4cb..e72d2a52 100644 --- a/gpt4all-backend/src/json-helpers.h +++ b/gpt4all-backend/src/json-helpers.h @@ -3,9 +3,13 @@ class QString; namespace boost::json { class value; + struct value_from_tag; template struct value_to_tag; } +/// Allows QString to be serialized to JSON. +void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QString &qstr); + /// Allows JSON strings to be deserialized as QString. QString tag_invoke(const boost::json::value_to_tag &, const boost::json::value &value); diff --git a/gpt4all-backend/src/ollama-client.cpp b/gpt4all-backend/src/ollama-client.cpp index 74e968c9..74029628 100644 --- a/gpt4all-backend/src/ollama-client.cpp +++ b/gpt4all-backend/src/ollama-client.cpp @@ -1,6 +1,7 @@ #include "ollama-client.h" #include "json-helpers.h" +#include "qt-json-stream.h" #include // IWYU pragma: keep #include // IWYU pragma: keep @@ -21,35 +22,21 @@ namespace json = boost::json; namespace gpt4all::backend { -template -auto OllamaClient::getSimple(const QString &endpoint) -> QCoro::Task> -{ - auto value = co_await getSimpleGeneric(endpoint); - if (value) - co_return boost::json::value_to(*value); - co_return std::unexpected(value.error()); -} -template auto OllamaClient::getSimple(const QString &) -> QCoro::Task>; -template auto OllamaClient::getSimple(const QString &) -> QCoro::Task>; - -auto OllamaClient::getSimpleGeneric(const QString &endpoint) -> QCoro::Task> +static auto processResponse(QNetworkReply &reply) -> QCoro::Task> { - std::unique_ptr reply(m_nam.get( - QNetworkRequest(m_baseUrl.resolved(QUrl(endpoint))) - )); - if (reply->error()) - co_return std::unexpected(reply.get()); + if (reply.error()) + co_return std::unexpected(&reply); try { json::parser p; - auto coroReply = qCoro(*reply); + auto coroReply = qCoro(reply); do { auto chunk = co_await coroReply.readAll(); - if (reply->error()) - co_return std::unexpected(reply.get()); + if (reply.error()) + co_return std::unexpected(&reply); p.write(chunk.data(), chunk.size()); - } while (!reply->atEnd()); + } while (!reply.atEnd()); co_return p.release(); } catch (const std::exception &e) { @@ -57,4 +44,50 @@ auto OllamaClient::getSimpleGeneric(const QString &endpoint) -> QCoro::Task +auto OllamaClient::get(const QString &path) -> QCoro::Task> +{ + auto value = co_await getJson(path); + if (value) + co_return json::value_to(*value); + co_return std::unexpected(value.error()); +} + +template auto OllamaClient::get(const QString &) -> QCoro::Task>; +template auto OllamaClient::get(const QString &) -> QCoro::Task>; + +template +auto OllamaClient::post(const QString &path, const Req &req) -> QCoro::Task> +{ + auto reqJson = json::value_from(req); + auto value = co_await postJson(path, reqJson); + if (value) + co_return json::value_to(*value); + co_return std::unexpected(value.error()); +} + +auto OllamaClient::getJson(const QString &path) -> QCoro::Task> +{ + std::unique_ptr reply(m_nam.get(makeRequest(path))); + co_return co_await processResponse(*reply); +} + +auto OllamaClient::postJson(const QString &path, const json::value &req) -> QCoro::Task> +{ + JsonStreamDevice reqStream(&req); + std::unique_ptr reply( + m_nam.post(makeRequest(path), &reqStream) + ); + co_return co_await processResponse(*reply); +} + + } // namespace gpt4all::backend diff --git a/gpt4all-backend/src/ollama-types.cpp b/gpt4all-backend/src/ollama-types.cpp new file mode 100644 index 00000000..600a67ed --- /dev/null +++ b/gpt4all-backend/src/ollama-types.cpp @@ -0,0 +1,31 @@ +#include "ollama-types.h" + +#include "json-helpers.h" + +#include +#include + +namespace json = boost::json; + + +namespace gpt4all::backend::ollama { + +ModelInfo tag_invoke(const boost::json::value_to_tag &, const boost::json::value &value) +{ + using namespace json; + auto &o = value.as_object(); + return { +#define T(name) std::remove_reference_t().name)> + .license = value_to(o.at("license" )), + .modelfile = value_to(o.at("modelfile" )), + .parameters = value_to(o.at("parameters")), + .template_ = value_to(o.at("template" )), // :( + .system = value_to(o.at("system" )), + .details = value_to(o.at("details" )), + .model_info = value_to(o.at("model_info")), + .messages = value_to(o.at("messages" )), +#undef T + }; +} + +} // namespace gpt4all::backend::ollama diff --git a/gpt4all-backend/src/qt-json-stream.cpp b/gpt4all-backend/src/qt-json-stream.cpp new file mode 100644 index 00000000..f44f5481 --- /dev/null +++ b/gpt4all-backend/src/qt-json-stream.cpp @@ -0,0 +1,30 @@ +#include "qt-json-stream.h" + + +namespace json = boost::json; + + +namespace gpt4all::backend { + +JsonStreamDevice::JsonStreamDevice(const json::value *jv, QObject *parent) + : QIODevice(parent) +{ + m_sr.reset(jv); + open(QIODevice::ReadOnly); +} + +qint64 JsonStreamDevice::readData(char *data, qint64 maxSize) +{ + if (m_sr.done()) return 0; + auto chunk = m_sr.read(data, size_t(maxSize)); + return qint64(chunk.size()); +} + +qint64 JsonStreamDevice::writeData(const char *data, qint64 maxSize) +{ + Q_UNUSED(data) + Q_UNUSED(maxSize) + return -1; +} + +} // namespace gpt4all::backend diff --git a/gpt4all-backend/src/qt-json-stream.h b/gpt4all-backend/src/qt-json-stream.h new file mode 100644 index 00000000..e603abd0 --- /dev/null +++ b/gpt4all-backend/src/qt-json-stream.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include +#include +#include + + +namespace gpt4all::backend { + +class JsonStreamDevice : public QIODevice +{ +public: + explicit JsonStreamDevice(const boost::json::value *jv, QObject *parent = nullptr); + + bool isSequential() const override { return true; } + +protected: + qint64 readData(char *data, qint64 maxSize) override; + qint64 writeData(const char *data, qint64 maxSize) override; + +private: + boost::json::serializer m_sr; +}; + +} // namespace gpt4all::backend