implement /api/show (not tested)

2025-08-14 14:13:38 +00:00 · 2025-02-26 19:47:25 -05:00 · 2025-02-26 19:47:25 -05:00 · 7ce2ea57e0
commit 7ce2ea57e0
parent 85eaa41e6d
11 changed files with 302 additions and 80 deletions
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@ -14,5 +14,5 @@ target_sources(gpt4all-backend PUBLIC
    FILE_SET public_headers TYPE HEADERS BASE_DIRS include FILES
        include/gpt4all-backend/formatters.h
        include/gpt4all-backend/ollama-client.h
-        include/gpt4all-backend/ollama-responses.h
+        include/gpt4all-backend/ollama-types.h
 )
--- a/gpt4all-backend/include/gpt4all-backend/ollama-client.h
+++ b/gpt4all-backend/include/gpt4all-backend/ollama-client.h
@ -1,6 +1,6 @@
 #pragma once

-#include "ollama-responses.h"
+#include "ollama-types.h"

 #include <QCoro/QCoroTask> // IWYU pragma: keep

@ -14,6 +14,7 @@
 #include <utility>
 #include <variant>

+class QNetworkRequest;
 namespace boost::json { class value; }


@ -50,8 +51,9 @@ using DataOrRespErr = std::expected<T, ResponseError>;

 class OllamaClient {
 public:
-    OllamaClient(QUrl baseUrl)
+    OllamaClient(QUrl baseUrl, QString m_userAgent = QStringLiteral("GPT4All"))
        : m_baseUrl(baseUrl)
+        , m_userAgent(std::move(m_userAgent))
        {}

    const QUrl &baseUrl() const { return m_baseUrl; }
@ -59,24 +61,35 @@ public:

    /// Returns the version of the Ollama server.
    auto getVersion() -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>
-    { return getSimple<ollama::VersionResponse>(QStringLiteral("version")); }
+    { return get<ollama::VersionResponse>(QStringLiteral("version")); }

    /// List models that are available locally.
    auto listModels() -> QCoro::Task<DataOrRespErr<ollama::ModelsResponse>>
-    { return getSimple<ollama::ModelsResponse>(QStringLiteral("tags")); }
+    { return get<ollama::ModelsResponse>(QStringLiteral("tags")); }
+
+    /// Show details about a model including modelfile, template, parameters, license, and system prompt.
+    auto showModelInfo(const ollama::ModelInfoRequest &req) -> QCoro::Task<DataOrRespErr<ollama::ModelInfo>>
+    { return post<ollama::ModelInfo>(QStringLiteral("show"), req); }

 private:
-    template <typename T>
-    auto getSimple(const QString &endpoint) -> QCoro::Task<DataOrRespErr<T>>;
+    QNetworkRequest makeRequest(const QString &path) const;

-    auto getSimpleGeneric(const QString &endpoint) -> QCoro::Task<DataOrRespErr<boost::json::value>>;
+    template <typename Resp>
+    auto get(const QString &path) -> QCoro::Task<DataOrRespErr<Resp>>;
+    template <typename Resp, typename Req>
+    auto post(const QString &path, Req const &req) -> QCoro::Task<DataOrRespErr<Resp>>;
+
+    auto getJson(const QString &path) -> QCoro::Task<DataOrRespErr<boost::json::value>>;
+    auto postJson(const QString &path, const boost::json::value &req)
+        -> QCoro::Task<DataOrRespErr<boost::json::value>>;

 private:
    QUrl                  m_baseUrl;
+    QString               m_userAgent;
    QNetworkAccessManager m_nam;
 };

-extern template auto OllamaClient::getSimple(const QString &) -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>;
-extern template auto OllamaClient::getSimple(const QString &) -> QCoro::Task<DataOrRespErr<ollama::ModelsResponse>>;
+extern template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>;
+extern template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ollama::ModelsResponse>>;

 } // namespace gpt4all::backend
--- a/gpt4all-backend/include/gpt4all-backend/ollama-responses.h
+++ b/gpt4all-backend/include/gpt4all-backend/ollama-responses.h
@ -1,47 +0,0 @@
-#pragma once
-
-#include <boost/describe/class.hpp>
-
-#include <QString>
-#include <QtTypes>
-
-#include <vector>
-
-
-namespace gpt4all::backend::ollama {
-
-/// Details about a model.
-struct ModelDetails {
-    QString              parent_model;       /// The parent of the model.
-    QString              format;             /// The format of the model.
-    QString              family;             /// The family of the model.
-    std::vector<QString> families;           /// The families of the model.
-    QString              parameter_size;     /// The size of the model's parameters.
-    QString              quantization_level; /// The quantization level of the model.
-};
-BOOST_DESCRIBE_STRUCT(ModelDetails, (), (parent_model, format, family, families, parameter_size, quantization_level))
-
-/// A model available locally.
-struct Model {
-    QString      model;       /// The model name.
-    QString      modified_at; /// Model modification date.
-    quint64      size;        /// Size of the model on disk.
-    QString      digest;      /// The model's digest.
-    ModelDetails details;     /// The model's details.
-};
-BOOST_DESCRIBE_STRUCT(Model, (), (model, modified_at, size, digest, details))
-
-
-/// The response class for the version endpoint.
-struct VersionResponse {
-    QString version; /// The version of the Ollama server.
-};
-BOOST_DESCRIBE_STRUCT(VersionResponse, (), (version))
-
-/// Response class for the list models endpoint.
-struct ModelsResponse {
-    std::vector<Model> models; /// List of models available locally.
-};
-BOOST_DESCRIBE_STRUCT(ModelsResponse, (), (models))
-
-} // namespace gpt4all::backend::ollama
--- a/gpt4all-backend/include/gpt4all-backend/ollama-types.h
+++ b/gpt4all-backend/include/gpt4all-backend/ollama-types.h
@ -0,0 +1,120 @@
+#pragma once
+
+#ifdef G4A_BACKEND_IMPL
+#   include <boost/describe/class.hpp>
+#   include <boost/describe/enum.hpp>
+#endif
+#include <boost/json.hpp>
+
+#include <QString>
+#include <QtTypes>
+
+#include <vector>
+
+
+namespace gpt4all::backend::ollama {
+
+/// Details about a model.
+struct ModelDetails {
+    QString              parent_model;       /// The parent of the model.
+    QString              format;             /// The format of the model.
+    QString              family;             /// The family of the model.
+    std::vector<QString> families;           /// The families of the model.
+    QString              parameter_size;     /// The size of the model's parameters.
+    QString              quantization_level; /// The quantization level of the model.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(ModelDetails, (), (parent_model, format, family, families, parameter_size, quantization_level))
+#endif
+
+/// A model available locally.
+struct Model {
+    QString      model;       /// The model name.
+    QString      modified_at; /// Model modification date.
+    quint64      size;        /// Size of the model on disk.
+    QString      digest;      /// The model's digest.
+    ModelDetails details;     /// The model's details.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(Model, (), (model, modified_at, size, digest, details))
+#endif
+
+
+/// Request class for the show model info endpoint.
+struct ModelInfoRequest {
+    QString model; /// The model name.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(ModelInfoRequest, (), (model))
+#endif
+
+enum MessageRole {
+    system,
+    user,
+    assistant,
+    tool,
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_ENUM(MessageRole, system, user, assistant, tool)
+#endif
+
+struct ToolCallFunction {
+    QString             name;      /// The name of the function to be called.
+    boost::json::object arguments; /// The arguments to pass to the function.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(ToolCallFunction, (), (name, arguments))
+#endif
+
+struct ToolCall {
+    ToolCallFunction function; /// The function the model wants to call.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(ToolCall, (), (function))
+#endif
+
+/// A message in the chat endpoint
+struct Message {
+    MessageRole           role;       /// The role of the message
+    QString               content;    /// The content of the message
+    std::vector<QString>  images;     /// (optional) a list of Base64-encoded images to include in the message
+    std::vector<ToolCall> tool_calls; /// A list of tool calls the model wants to call.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(Message, (), (role, content, images, tool_calls))
+#endif
+
+
+/// The response class for the version endpoint.
+struct VersionResponse {
+    QString version; /// The version of the Ollama server.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(VersionResponse, (), (version))
+#endif
+
+/// Response class for the list models endpoint.
+struct ModelsResponse {
+    std::vector<Model> models; /// List of models available locally.
+};
+#ifdef G4A_BACKEND_IMPL
+BOOST_DESCRIBE_STRUCT(ModelsResponse, (), (models))
+#endif
+
+/// Details about a model including modelfile, template, parameters, license, and system prompt.
+struct ModelInfo {
+    std::optional<QString>              license;    /// The model's license.
+    std::optional<QString>              modelfile;  /// The modelfile associated with the model.
+    std::optional<QString>              parameters; /// The model parameters.
+    std::optional<QString>              template_;  /// The prompt template for the model.
+    std::optional<QString>              system;     /// The system prompt for the model.
+    ModelDetails                        details;
+    boost::json::object                 model_info;
+    std::optional<std::vector<Message>> messages;   /// The default messages for the model.
+};
+
+#ifdef G4A_BACKEND_IMPL
+ModelInfo tag_invoke(const boost::json::value_to_tag<ModelInfo> &, const boost::json::value &value);
+#endif
+
+} // namespace gpt4all::backend::ollama
--- a/gpt4all-backend/src/CMakeLists.txt
+++ b/gpt4all-backend/src/CMakeLists.txt
@ -3,8 +3,11 @@ set(TARGET gpt4all-backend)
 add_library(${TARGET} STATIC
    json-helpers.cpp
    ollama-client.cpp
+    ollama-types.cpp
+    qt-json-stream.cpp
 )
 target_compile_features(${TARGET} PUBLIC cxx_std_23)
+target_compile_definitions(${TARGET} PRIVATE G4A_BACKEND_IMPL)
 gpt4all_add_warning_options(${TARGET})
 target_include_directories(${TARGET} PRIVATE
    .
@ -23,5 +26,5 @@ target_link_libraries(${TARGET} PRIVATE

 # link Boost::json as -isystem to suppress -Wundef
 get_target_property(LIB_INCLUDE_DIRS Boost::json INTERFACE_INCLUDE_DIRECTORIES)
-target_include_directories(${TARGET} SYSTEM PRIVATE ${LIB_INCLUDE_DIRS})
-target_link_libraries(${TARGET} PRIVATE Boost::json)
+target_include_directories(${TARGET} SYSTEM PUBLIC ${LIB_INCLUDE_DIRS})
+target_link_libraries(${TARGET} PUBLIC Boost::json)
--- a/gpt4all-backend/src/json-helpers.cpp
+++ b/gpt4all-backend/src/json-helpers.cpp
@ -4,6 +4,14 @@

 #include <QString>

+namespace json = boost::json;
+
+
+void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QString &qstr)
+{
+    auto utf8 = qstr.toUtf8();
+    value = json::value_from(json::string_view(utf8.data(), utf8.size()));
+}

 QString tag_invoke(const boost::json::value_to_tag<QString> &, const boost::json::value &value)
 {
--- a/gpt4all-backend/src/json-helpers.h
+++ b/gpt4all-backend/src/json-helpers.h
@ -3,9 +3,13 @@
 class QString;
 namespace boost::json {
    class value;
+    struct value_from_tag;
    template <typename T> struct value_to_tag;
 }


+/// Allows QString to be serialized to JSON.
+void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const QString &qstr);
+
 /// Allows JSON strings to be deserialized as QString.
 QString tag_invoke(const boost::json::value_to_tag<QString> &, const boost::json::value &value);
--- a/gpt4all-backend/src/ollama-client.cpp
+++ b/gpt4all-backend/src/ollama-client.cpp
@ -1,6 +1,7 @@
 #include "ollama-client.h"

 #include "json-helpers.h"
+#include "qt-json-stream.h"

 #include <QCoro/QCoroIODevice> // IWYU pragma: keep
 #include <QCoro/QCoroNetworkReply> // IWYU pragma: keep
@ -21,35 +22,21 @@ namespace json = boost::json;

 namespace gpt4all::backend {

-template <typename T>
-auto OllamaClient::getSimple(const QString &endpoint) -> QCoro::Task<DataOrRespErr<T>>
-{
-    auto value = co_await getSimpleGeneric(endpoint);
-    if (value)
-        co_return boost::json::value_to<T>(*value);
-    co_return std::unexpected(value.error());
-}

-template auto OllamaClient::getSimple(const QString &) -> QCoro::Task<DataOrRespErr<VersionResponse>>;
-template auto OllamaClient::getSimple(const QString &) -> QCoro::Task<DataOrRespErr<ModelsResponse>>;
-
-auto OllamaClient::getSimpleGeneric(const QString &endpoint) -> QCoro::Task<DataOrRespErr<json::value>>
+static auto processResponse(QNetworkReply &reply) -> QCoro::Task<DataOrRespErr<json::value>>
 {
-    std::unique_ptr<QNetworkReply> reply(m_nam.get(
-        QNetworkRequest(m_baseUrl.resolved(QUrl(endpoint)))
-    ));
-    if (reply->error())
-        co_return std::unexpected(reply.get());
+    if (reply.error())
+        co_return std::unexpected(&reply);

    try {
        json::parser p;
-        auto coroReply = qCoro(*reply);
+        auto coroReply = qCoro(reply);
        do {
            auto chunk = co_await coroReply.readAll();
-            if (reply->error())
-                co_return std::unexpected(reply.get());
+            if (reply.error())
+                co_return std::unexpected(&reply);
            p.write(chunk.data(), chunk.size());
-        } while (!reply->atEnd());
+        } while (!reply.atEnd());

        co_return p.release();
    } catch (const std::exception &e) {
@ -57,4 +44,50 @@ auto OllamaClient::getSimpleGeneric(const QString &endpoint) -> QCoro::Task<Data
    }
 }

+
+QNetworkRequest OllamaClient::makeRequest(const QString &path) const
+{
+    QNetworkRequest req(m_baseUrl.resolved(QUrl(path)));
+    req.setHeader(QNetworkRequest::UserAgentHeader, m_userAgent);
+    return req;
+}
+
+template <typename Resp>
+auto OllamaClient::get(const QString &path) -> QCoro::Task<DataOrRespErr<Resp>>
+{
+    auto value = co_await getJson(path);
+    if (value)
+        co_return json::value_to<Resp>(*value);
+    co_return std::unexpected(value.error());
+}
+
+template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<VersionResponse>>;
+template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ModelsResponse>>;
+
+template <typename Resp, typename Req>
+auto OllamaClient::post(const QString &path, const Req &req) -> QCoro::Task<DataOrRespErr<Resp>>
+{
+    auto reqJson = json::value_from(req);
+    auto value = co_await postJson(path, reqJson);
+    if (value)
+        co_return json::value_to<Resp>(*value);
+    co_return std::unexpected(value.error());
+}
+
+auto OllamaClient::getJson(const QString &path) -> QCoro::Task<DataOrRespErr<json::value>>
+{
+    std::unique_ptr<QNetworkReply> reply(m_nam.get(makeRequest(path)));
+    co_return co_await processResponse(*reply);
+}
+
+auto OllamaClient::postJson(const QString &path, const json::value &req) -> QCoro::Task<DataOrRespErr<json::value>>
+{
+    JsonStreamDevice reqStream(&req);
+    std::unique_ptr<QNetworkReply> reply(
+        m_nam.post(makeRequest(path), &reqStream)
+    );
+    co_return co_await processResponse(*reply);
+}
+
+
 } // namespace gpt4all::backend
--- a/gpt4all-backend/src/ollama-types.cpp
+++ b/gpt4all-backend/src/ollama-types.cpp
@ -0,0 +1,31 @@
+#include "ollama-types.h"
+
+#include "json-helpers.h"
+
+#include <type_traits>
+#include <utility>
+
+namespace json = boost::json;
+
+
+namespace gpt4all::backend::ollama {
+
+ModelInfo tag_invoke(const boost::json::value_to_tag<ModelInfo> &, const boost::json::value &value)
+{
+    using namespace json;
+    auto &o = value.as_object();
+    return {
+#define T(name) std::remove_reference_t<decltype(std::declval<ModelInfo>().name)>
+        .license    = value_to<T(license   )>(o.at("license"   )),
+        .modelfile  = value_to<T(modelfile )>(o.at("modelfile" )),
+        .parameters = value_to<T(parameters)>(o.at("parameters")),
+        .template_  = value_to<T(template_ )>(o.at("template"  )), // :(
+        .system     = value_to<T(system    )>(o.at("system"    )),
+        .details    = value_to<T(details   )>(o.at("details"   )),
+        .model_info = value_to<T(model_info)>(o.at("model_info")),
+        .messages   = value_to<T(messages  )>(o.at("messages"  )),
+#undef T
+    };
+}
+
+} // namespace gpt4all::backend::ollama
--- a/gpt4all-backend/src/qt-json-stream.cpp
+++ b/gpt4all-backend/src/qt-json-stream.cpp
@ -0,0 +1,30 @@
+#include "qt-json-stream.h"
+
+
+namespace json = boost::json;
+
+
+namespace gpt4all::backend {
+
+JsonStreamDevice::JsonStreamDevice(const json::value *jv, QObject *parent)
+    : QIODevice(parent)
+{
+    m_sr.reset(jv);
+    open(QIODevice::ReadOnly);
+}
+
+qint64 JsonStreamDevice::readData(char *data, qint64 maxSize)
+{
+    if (m_sr.done()) return 0;
+    auto chunk = m_sr.read(data, size_t(maxSize));
+    return qint64(chunk.size());
+}
+
+qint64 JsonStreamDevice::writeData(const char *data, qint64 maxSize)
+{
+    Q_UNUSED(data)
+    Q_UNUSED(maxSize)
+    return -1;
+}
+
+} // namespace gpt4all::backend
--- a/gpt4all-backend/src/qt-json-stream.h
+++ b/gpt4all-backend/src/qt-json-stream.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <boost/json.hpp>
+
+#include <QIODevice>
+#include <QObject>
+#include <QtTypes>
+
+
+namespace gpt4all::backend {
+
+class JsonStreamDevice : public QIODevice
+{
+public:
+    explicit JsonStreamDevice(const boost::json::value *jv, QObject *parent = nullptr);
+
+    bool isSequential() const override { return true; }
+
+protected:
+    qint64 readData(char *data, qint64 maxSize) override;
+    qint64 writeData(const char *data, qint64 maxSize) override;
+
+private:
+    boost::json::serializer m_sr;
+};
+
+} // namespace gpt4all::backend