stuff is working now

This commit is contained in:
Jared Van Bortel 2025-02-27 18:31:11 -05:00
parent 068845e1a2
commit d20cfbbec9
11 changed files with 346 additions and 146 deletions

View File

@ -1,6 +1,9 @@
#include "config.h"
#include "pretty.h"
#include <QCoro/QCoroTask> // IWYU pragma: keep
#include <boost/json.hpp>
#include <fmt/base.h>
#include <gpt4all-backend/formatters.h> // IWYU pragma: keep
#include <gpt4all-backend/ollama-client.h>
@ -14,35 +17,43 @@
#include <expected>
#include <variant>
namespace json = boost::json;
using namespace Qt::Literals::StringLiterals;
using gpt4all::backend::OllamaClient;
template <typename T>
static std::string to_json(const T &value)
{ return pretty_print(json::value_from(value)); }
static void run()
{
fmt::print("Connecting to server at {}\n", OLLAMA_URL);
OllamaClient provider(OLLAMA_URL);
auto versionResp = QCoro::waitFor(provider.getVersion());
auto versionResp = QCoro::waitFor(provider.version());
if (versionResp) {
fmt::print("Server version: {}\n", versionResp->version);
fmt::print("Version response: {}\n", to_json(*versionResp));
} else {
fmt::print("Error retrieving version: {}\n", versionResp.error().errorString);
return QCoreApplication::exit(1);
}
auto modelsResponse = QCoro::waitFor(provider.listModels());
auto modelsResponse = QCoro::waitFor(provider.list());
if (modelsResponse) {
fmt::print("Available models:\n");
for (const auto & model : modelsResponse->models)
fmt::print("{}\n", model.model);
if (!modelsResponse->models.empty())
fmt::print("First model: {}\n", to_json(modelsResponse->models.front()));
} else {
fmt::print("Error retrieving available models: {}\n", modelsResponse.error().errorString);
return QCoreApplication::exit(1);
}
auto showResponse = QCoro::waitFor(provider.showModelInfo({ .model = "DeepSeek-R1-Distill-Llama-70B-Q4_K_S" }));
auto showResponse = QCoro::waitFor(provider.show({ .model = "DeepSeek-R1-Distill-Llama-70B-Q4_K_S" }));
if (showResponse) {
fmt::print("Model family: {}\n", showResponse->details.family);
fmt::print("Show response: {}\n", to_json(*showResponse));
} else {
fmt::print("Error retrieving model info: {}\n", showResponse.error().errorString);
return QCoreApplication::exit(1);
@ -51,7 +62,6 @@ static void run()
QCoreApplication::exit(0);
}
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);

View File

@ -0,0 +1,95 @@
#pragma once
#include <boost/json.hpp>
#include <sstream>
#include <string>
inline void pretty_print( std::ostream& os, boost::json::value const& jv, std::string* indent = nullptr )
{
std::string indent_;
if(! indent)
indent = &indent_;
switch(jv.kind())
{
case boost::json::kind::object:
{
os << "{\n";
indent->append(4, ' ');
auto const& obj = jv.get_object();
if(! obj.empty())
{
auto it = obj.begin();
for(;;)
{
os << *indent << boost::json::serialize(it->key()) << ": ";
pretty_print(os, it->value(), indent);
if(++it == obj.end())
break;
os << ",\n";
}
}
os << "\n";
indent->resize(indent->size() - 4);
os << *indent << "}";
break;
}
case boost::json::kind::array:
{
os << "[\n";
indent->append(4, ' ');
auto const& arr = jv.get_array();
if(! arr.empty())
{
auto it = arr.begin();
for(;;)
{
os << *indent;
pretty_print( os, *it, indent);
if(++it == arr.end())
break;
os << ",\n";
}
}
os << "\n";
indent->resize(indent->size() - 4);
os << *indent << "]";
break;
}
case boost::json::kind::string:
{
os << boost::json::serialize(jv.get_string());
break;
}
case boost::json::kind::uint64:
case boost::json::kind::int64:
case boost::json::kind::double_:
os << jv;
break;
case boost::json::kind::bool_:
if(jv.get_bool())
os << "true";
else
os << "false";
break;
case boost::json::kind::null:
os << "null";
break;
}
if(indent->empty())
os << "\n";
}
inline std::string pretty_print( boost::json::value const& jv, std::string* indent = nullptr )
{
std::ostringstream ss;
pretty_print(ss, jv, indent);
return ss.str();
}

View File

@ -13,6 +13,7 @@ add_subdirectory(src)
target_sources(gpt4all-backend PUBLIC
FILE_SET public_headers TYPE HEADERS BASE_DIRS include FILES
include/gpt4all-backend/formatters.h
include/gpt4all-backend/json-helpers.h
include/gpt4all-backend/ollama-client.h
include/gpt4all-backend/ollama-types.h
)

View File

@ -3,6 +3,8 @@
#include "ollama-types.h"
#include <QCoro/QCoroTask> // IWYU pragma: keep
#include <boost/json.hpp> // IWYU pragma: keep
#include <boost/system.hpp>
#include <QNetworkAccessManager>
#include <QNetworkReply>
@ -15,13 +17,19 @@
#include <variant>
class QNetworkRequest;
namespace boost::json { class value; }
namespace gpt4all::backend {
struct ResponseError {
QNetworkReply::NetworkError error;
private:
using ErrorCode = std::variant<
QNetworkReply::NetworkError,
boost::system::error_code
>;
public:
ErrorCode error;
QString errorString;
ResponseError(const QNetworkReply *reply)
@ -30,6 +38,13 @@ struct ResponseError {
{
assert(reply->error());
}
ResponseError(const boost::system::system_error &e)
: error(e.code())
, errorString(QString::fromUtf8(e.what()))
{
assert(e.code());
}
};
template <typename T>
@ -45,40 +60,44 @@ public:
const QUrl &baseUrl() const { return m_baseUrl; }
void getBaseUrl(QUrl value) { m_baseUrl = std::move(value); }
/// Returns the version of the Ollama server.
auto getVersion() -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>
/// Returns the Ollama server version as a string.
auto version() -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>
{ return get<ollama::VersionResponse>(QStringLiteral("version")); }
/// List models that are available locally.
auto listModels() -> QCoro::Task<DataOrRespErr<ollama::ModelsResponse>>
{ return get<ollama::ModelsResponse>(QStringLiteral("tags")); }
/// Lists models that are available locally.
auto list() -> QCoro::Task<DataOrRespErr<ollama::ListResponse>>
{ return get<ollama::ListResponse>(QStringLiteral("tags")); }
/// Show details about a model including modelfile, template, parameters, license, and system prompt.
auto showModelInfo(const ollama::ModelInfoRequest &req) -> QCoro::Task<DataOrRespErr<ollama::ModelInfo>>
{ return post<ollama::ModelInfo>(QStringLiteral("show"), req); }
/// Obtains model information, including details, modelfile, license etc.
auto show(const ollama::ShowRequest &req) -> QCoro::Task<DataOrRespErr<ollama::ShowResponse>>
{ return post<ollama::ShowResponse>(QStringLiteral("show"), req); }
private:
QNetworkRequest makeRequest(const QString &path) const;
auto processResponse(QNetworkReply &reply) -> QCoro::Task<DataOrRespErr<boost::json::value>>;
template <typename Resp>
auto get(const QString &path) -> QCoro::Task<DataOrRespErr<Resp>>;
template <typename Resp, typename Req>
auto post(const QString &path, Req const &req) -> QCoro::Task<DataOrRespErr<Resp>>;
auto post(const QString &path, Req const &body) -> QCoro::Task<DataOrRespErr<Resp>>;
auto getJson(const QString &path) -> QCoro::Task<DataOrRespErr<boost::json::value>>;
auto postJson(const QString &path, const boost::json::value &req)
auto postJson(const QString &path, const boost::json::value &body)
-> QCoro::Task<DataOrRespErr<boost::json::value>>;
private:
QUrl m_baseUrl;
QString m_userAgent;
QNetworkAccessManager m_nam;
boost::json::stream_parser m_parser;
};
extern template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ollama::VersionResponse>>;
extern template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ollama::ModelsResponse>>;
extern template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ollama::ListResponse>>;
extern template auto OllamaClient::post(const QString &, const ollama::ShowRequest &)
-> QCoro::Task<DataOrRespErr<ollama::ShowResponse>>;
extern template auto OllamaClient::post(const QString &, const ollama::ModelInfoRequest &)
-> QCoro::Task<DataOrRespErr<ollama::ModelInfo>>;
} // namespace gpt4all::backend

View File

@ -1,131 +1,128 @@
#pragma once
#ifdef G4A_BACKEND_IMPL
#include "json-helpers.h" // IWYU pragma: keep
#include <boost/describe/class.hpp>
# include <boost/describe/enum.hpp>
#endif
#include <boost/json.hpp> // IWYU pragma: keep
#include <QByteArray>
#include <QString>
#include <QtTypes>
#include <chrono>
#include <optional>
#include <vector>
namespace gpt4all::backend::ollama {
//
// basic types
//
/// Details about a model.
struct Time : std::chrono::sys_time<std::chrono::nanoseconds> {};
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, Time time);
Time tag_invoke(const boost::json::value_to_tag<Time> &, const boost::json::value &value);
/// ImageData represents the raw binary data of an image file.
struct ImageData : QByteArray {};
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const ImageData &image);
ImageData tag_invoke(const boost::json::value_to_tag<ImageData> &, const boost::json::value &value);
struct ModelDetails {
QString parent_model; /// The parent of the model.
QString format; /// The format of the model.
QString family; /// The family of the model.
std::vector<QString> families; /// The families of the model.
QString parameter_size; /// The size of the model's parameters.
QString quantization_level; /// The quantization level of the model.
QString parent_model;
QString format;
QString family;
std::vector<QString> families;
QString parameter_size;
QString quantization_level;
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(ModelDetails, (), (parent_model, format, family, families, parameter_size, quantization_level))
#endif
/// A model available locally.
struct Model {
QString model; /// The model name.
QString modified_at; /// Model modification date.
quint64 size; /// Size of the model on disk.
QString digest; /// The model's digest.
ModelDetails details; /// The model's details.
/// ListModelResponse is a single model description in ListResponse.
struct ListModelResponse {
QString name;
QString model;
Time modified_at;
qint64 size; /// Size of the model on disk.
QString digest;
std::optional<ModelDetails> details;
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(Model, (), (model, modified_at, size, digest, details))
#endif
BOOST_DESCRIBE_STRUCT(ListModelResponse, (), (model, modified_at, size, digest, details))
enum MessageRole {
system,
user,
assistant,
tool,
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_ENUM(MessageRole, system, user, assistant, tool)
#endif
using ToolCallFunctionArguments = boost::json::object;
struct ToolCallFunction {
QString name; /// The name of the function to be called.
boost::json::object arguments; /// The arguments to pass to the function.
std::optional<int> index;
QString name;
ToolCallFunctionArguments arguments;
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(ToolCallFunction, (), (name, arguments))
#endif
BOOST_DESCRIBE_STRUCT(ToolCallFunction, (), (index, name, arguments))
struct ToolCall {
ToolCallFunction function; /// The function the model wants to call.
ToolCallFunction function;
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(ToolCall, (), (function))
#endif
/// A message in the chat endpoint
/// Message is a single message in a chat sequence. The message contains the
/// role ("system", "user", or "assistant"), the content and an optional list
/// of images.
struct Message {
MessageRole role; /// The role of the message
QString content; /// The content of the message
std::vector<QString> images; /// (optional) a list of Base64-encoded images to include in the message
std::vector<ToolCall> tool_calls; /// A list of tool calls the model wants to call.
QString role;
QString content;
std::optional<std::vector<ImageData>> images;
std::optional<std::vector<ToolCall>> tool_calls;
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(Message, (), (role, content, images, tool_calls))
#endif
//
// request types
//
/// Request class for the show model info endpoint.
struct ModelInfoRequest {
QString model; /// The model name.
/// ShowRequest is the request passed to OllamaClient::show().
struct ShowRequest {
QString model;
std::optional<QString> system {};
std::optional<bool> verbose {};
std::optional<boost::json::object> options {};
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(ModelInfoRequest, (), (model))
#endif
BOOST_DESCRIBE_STRUCT(ShowRequest, (), (model, system, verbose, options))
//
// response types
//
/// The response class for the version endpoint.
/// VersionRepsonse is the response from OllamaClient::version().
struct VersionResponse {
QString version; /// The version of the Ollama server.
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(VersionResponse, (), (version))
#endif
/// Response class for the list models endpoint.
struct ModelsResponse {
std::vector<Model> models; /// List of models available locally.
};
#ifdef G4A_BACKEND_IMPL
BOOST_DESCRIBE_STRUCT(ModelsResponse, (), (models))
#endif
/// Details about a model including modelfile, template, parameters, license, and system prompt.
struct ModelInfo {
std::optional<QString> license; /// The model's license.
/// ShowResponse is the response from OllamaClient::show().
struct ShowResponse {
std::optional<QString> license;
std::optional<QString> modelfile; /// The modelfile associated with the model.
std::optional<QString> parameters; /// The model parameters.
std::optional<QString> parameters;
std::optional<QString> template_; /// The prompt template for the model.
std::optional<QString> system; /// The system prompt for the model.
ModelDetails details;
boost::json::object model_info;
std::optional<ModelDetails> details;
std::optional<std::vector<Message>> messages; /// The default messages for the model.
std::optional<boost::json::object> model_info;
std::optional<boost::json::object> projector_info;
std::optional<Time> modified_at;
};
#ifdef G4A_BACKEND_IMPL
ModelInfo tag_invoke(const boost::json::value_to_tag<ModelInfo> &, const boost::json::value &value);
#endif
void tag_invoke(const boost::json::value_from_tag &, boost::json::value &value, const ShowResponse &resp);
ShowResponse tag_invoke(const boost::json::value_to_tag<ShowResponse> &, const boost::json::value &value);
/// ListResponse is the response from OllamaClient::list().
struct ListResponse {
std::vector<ListModelResponse> models; /// List of available models.
};
BOOST_DESCRIBE_STRUCT(ListResponse, (), (models))
} // namespace gpt4all::backend::ollama

View File

@ -7,7 +7,6 @@ add_library(${TARGET} STATIC
qt-json-stream.cpp
)
target_compile_features(${TARGET} PUBLIC cxx_std_23)
target_compile_definitions(${TARGET} PRIVATE G4A_BACKEND_IMPL)
gpt4all_add_warning_options(${TARGET})
target_include_directories(${TARGET} PRIVATE
.

View File

@ -5,7 +5,6 @@
#include <QCoro/QCoroIODevice> // IWYU pragma: keep
#include <QCoro/QCoroNetworkReply> // IWYU pragma: keep
#include <boost/json.hpp> // IWYU pragma: keep
#include <QByteArray>
#include <QNetworkRequest>
@ -22,24 +21,6 @@ namespace json = boost::json;
namespace gpt4all::backend {
static auto processResponse(QNetworkReply &reply) -> QCoro::Task<DataOrRespErr<json::value>>
{
if (reply.error())
co_return std::unexpected(&reply);
json::parser p;
auto coroReply = qCoro(reply);
do {
auto chunk = co_await coroReply.readAll();
if (reply.error())
co_return std::unexpected(&reply);
p.write(chunk.data(), chunk.size());
} while (!reply.atEnd());
co_return p.release();
}
QNetworkRequest OllamaClient::makeRequest(const QString &path) const
{
QNetworkRequest req(m_baseUrl.resolved(QUrl(path)));
@ -47,29 +28,56 @@ QNetworkRequest OllamaClient::makeRequest(const QString &path) const
return req;
}
auto OllamaClient::processResponse(QNetworkReply &reply) -> QCoro::Task<DataOrRespErr<json::value>>
{
if (reply.error())
co_return std::unexpected(&reply);
auto coroReply = qCoro(reply);
do {
auto chunk = co_await coroReply.readAll();
if (reply.error())
co_return std::unexpected(&reply);
m_parser.write(chunk.data(), chunk.size());
} while (!reply.atEnd());
m_parser.finish();
co_return m_parser.release();
}
template <typename Resp>
auto OllamaClient::get(const QString &path) -> QCoro::Task<DataOrRespErr<Resp>>
{
// get() should not throw exceptions
try {
auto value = co_await getJson(path);
if (value)
co_return json::value_to<Resp>(*value);
co_return std::unexpected(value.error());
} catch (const boost::system::system_error &e) {
co_return std::unexpected(e);
}
}
template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<VersionResponse>>;
template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ModelsResponse>>;
template auto OllamaClient::get(const QString &) -> QCoro::Task<DataOrRespErr<ListResponse>>;
template <typename Resp, typename Req>
auto OllamaClient::post(const QString &path, const Req &req) -> QCoro::Task<DataOrRespErr<Resp>>
auto OllamaClient::post(const QString &path, const Req &body) -> QCoro::Task<DataOrRespErr<Resp>>
{
auto reqJson = json::value_from(req);
// post() should not throw exceptions
try {
auto reqJson = json::value_from(body);
auto value = co_await postJson(path, reqJson);
if (value)
co_return json::value_to<Resp>(*value);
co_return std::unexpected(value.error());
} catch (const boost::system::system_error &e) {
co_return std::unexpected(e);
}
}
template auto OllamaClient::post(const QString &, const ModelInfoRequest &) -> QCoro::Task<DataOrRespErr<ModelInfo>>;
template auto OllamaClient::post(const QString &, const ShowRequest &) -> QCoro::Task<DataOrRespErr<ShowResponse>>;
auto OllamaClient::getJson(const QString &path) -> QCoro::Task<DataOrRespErr<json::value>>
{
@ -77,12 +85,12 @@ auto OllamaClient::getJson(const QString &path) -> QCoro::Task<DataOrRespErr<jso
co_return co_await processResponse(*reply);
}
auto OllamaClient::postJson(const QString &path, const json::value &req) -> QCoro::Task<DataOrRespErr<json::value>>
auto OllamaClient::postJson(const QString &path, const json::value &body) -> QCoro::Task<DataOrRespErr<json::value>>
{
JsonStreamDevice reqStream(&req);
std::unique_ptr<QNetworkReply> reply(
m_nam.post(makeRequest(path), &reqStream)
);
JsonStreamDevice stream(&body);
auto req = makeRequest(path);
req.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"_ba);
std::unique_ptr<QNetworkReply> reply(m_nam.post(req, &stream));
co_return co_await processResponse(*reply);
}

View File

@ -2,30 +2,97 @@
#include "json-helpers.h"
#include <fmt/chrono.h> // IWYU pragma: keep
#include <fmt/format.h>
#include <sstream>
#include <string>
#include <system_error>
#include <type_traits>
#include <utility>
namespace json = boost::json;
template <typename T>
static T get_optional(const json::object &o, json::string_view key)
{
if (auto *p = o.if_contains(key))
return value_to<typename T::value_type>(*p);
return std::nullopt;
}
namespace gpt4all::backend::ollama {
ModelInfo tag_invoke(const boost::json::value_to_tag<ModelInfo> &, const boost::json::value &value)
void tag_invoke(const json::value_from_tag &, json::value &value, Time time)
{
value = json::value_from(fmt::format(
"{:%FT%T}Z",
static_cast<const std::chrono::sys_time<std::chrono::nanoseconds> &>(time)
));
}
Time tag_invoke(const json::value_to_tag<Time> &, const json::value &value)
{
namespace sys = boost::system;
Time time;
std::istringstream iss(json::string_view(value.as_string()));
iss >> std::chrono::parse("%FT%T%Ez", time);
if (!iss && !iss.eof())
throw sys::system_error(std::make_error_code(std::errc::invalid_argument), __func__);
return time;
}
void tag_invoke(const json::value_from_tag &, json::value &value, const ImageData &image)
{
auto base64 = image.toBase64();
value = json::value_from(json::string_view(base64.data(), base64.size()));
}
ImageData tag_invoke(const json::value_to_tag<ImageData> &, const json::value &value)
{
auto &str = value.as_string();
return ImageData(QByteArray::fromBase64(QByteArray::fromRawData(str.data(), str.size())));
}
void tag_invoke(const json::value_from_tag &, json::value &value, const ShowResponse &resp)
{
auto &o = value.emplace_object();
auto maybe_add = [&o](json::string_view key, auto &v) { if (v) o[key] = json::value_from(*v); };
maybe_add("license", resp.license );
maybe_add("modelfile", resp.modelfile );
maybe_add("parameters", resp.parameters );
maybe_add("template", resp.template_ );
maybe_add("system", resp.system );
maybe_add("details", resp.details );
maybe_add("messages", resp.messages );
maybe_add("model_info", resp.model_info );
maybe_add("projector_info", resp.projector_info);
maybe_add("modified_at", resp.modified_at );
}
ShowResponse tag_invoke(const json::value_to_tag<ShowResponse> &, const json::value &value)
{
using namespace json;
auto &o = value.as_object();
return {
#define T(name) std::remove_reference_t<decltype(std::declval<ModelInfo>().name)>
.license = value_to<T(license )>(o.at("license" )),
.modelfile = value_to<T(modelfile )>(o.at("modelfile" )),
.parameters = value_to<T(parameters)>(o.at("parameters")),
.template_ = value_to<T(template_ )>(o.at("template" )), // :(
.system = value_to<T(system )>(o.at("system" )),
.details = value_to<T(details )>(o.at("details" )),
.model_info = value_to<T(model_info)>(o.at("model_info")),
.messages = value_to<T(messages )>(o.at("messages" )),
#define T(name) std::remove_reference_t<decltype(std::declval<ShowResponse>().name)>
.license = get_optional<T(license )>(o, "license" ),
.modelfile = get_optional<T(modelfile )>(o, "modelfile" ),
.parameters = get_optional<T(parameters )>(o, "parameters" ),
.template_ = get_optional<T(template_ )>(o, "template" ), // :(
.system = get_optional<T(system )>(o, "system" ),
.details = get_optional<T(details )>(o, "details" ),
.messages = get_optional<T(messages )>(o, "messages" ),
.model_info = get_optional<T(model_info )>(o, "model_info" ),
.projector_info = get_optional<T(projector_info)>(o, "projector_info"),
.modified_at = get_optional<T(modified_at )>(o, "modified_at" ),
#undef T
};
}
} // namespace gpt4all::backend::ollama

View File

@ -10,6 +10,7 @@ namespace json = boost::json;
namespace gpt4all::backend {
JsonStreamDevice::JsonStreamDevice(const json::value *jv, QObject *parent)
: QIODevice(parent)
{
@ -31,4 +32,5 @@ qint64 JsonStreamDevice::writeData(const char *data, qint64 maxSize)
return -1;
}
} // namespace gpt4all::backend

View File

@ -10,6 +10,7 @@ class QObject;
namespace gpt4all::backend {
class JsonStreamDevice : public QIODevice
{
public:
@ -25,4 +26,5 @@ private:
boost::json::serializer m_sr;
};
} // namespace gpt4all::backend