diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index e0f6b6f8..994ecb93 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -107,6 +107,7 @@ endif() qt_add_executable(chat main.cpp + bravesearch.h bravesearch.cpp chat.h chat.cpp chatllm.h chatllm.cpp chatmodel.h chatlistmodel.h chatlistmodel.cpp @@ -120,6 +121,7 @@ qt_add_executable(chat modellist.h modellist.cpp mysettings.h mysettings.cpp network.h network.cpp + sourceexcerpt.h server.h server.cpp logger.h logger.cpp ${APP_ICON_RESOURCE} @@ -153,6 +155,7 @@ qt_add_qml_module(chat qml/ThumbsDownDialog.qml qml/Toast.qml qml/ToastManager.qml + qml/ToolSettings.qml qml/MyBusyIndicator.qml qml/MyButton.qml qml/MyCheckBox.qml diff --git a/gpt4all-chat/bravesearch.cpp b/gpt4all-chat/bravesearch.cpp new file mode 100644 index 00000000..a9c0df47 --- /dev/null +++ b/gpt4all-chat/bravesearch.cpp @@ -0,0 +1,221 @@ +#include "bravesearch.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Qt::Literals::StringLiterals; + +QPair> BraveSearch::search(const QString &apiKey, const QString &query, int topK, unsigned long timeout) +{ + QThread workerThread; + BraveAPIWorker worker; + worker.moveToThread(&workerThread); + connect(&worker, &BraveAPIWorker::finished, &workerThread, &QThread::quit, Qt::DirectConnection); + connect(this, &BraveSearch::request, &worker, &BraveAPIWorker::request, Qt::QueuedConnection); + workerThread.start(); + emit request(apiKey, query, topK); + workerThread.wait(timeout); + workerThread.quit(); + workerThread.wait(); + return worker.response(); +} + +void BraveAPIWorker::request(const QString &apiKey, const QString &query, int topK) +{ + m_topK = topK; + QUrl jsonUrl("https://api.search.brave.com/res/v1/web/search"); + QUrlQuery urlQuery; + urlQuery.addQueryItem("q", query); + jsonUrl.setQuery(urlQuery); + QNetworkRequest request(jsonUrl); + QSslConfiguration conf = request.sslConfiguration(); + conf.setPeerVerifyMode(QSslSocket::VerifyNone); + request.setSslConfiguration(conf); + + request.setRawHeader("X-Subscription-Token", apiKey.toUtf8()); +// request.setRawHeader("Accept-Encoding", "gzip"); + request.setRawHeader("Accept", "application/json"); + + m_networkManager = new QNetworkAccessManager(this); + QNetworkReply *reply = m_networkManager->get(request); + connect(qGuiApp, &QCoreApplication::aboutToQuit, reply, &QNetworkReply::abort); + connect(reply, &QNetworkReply::finished, this, &BraveAPIWorker::handleFinished); + connect(reply, &QNetworkReply::errorOccurred, this, &BraveAPIWorker::handleErrorOccurred); +} + +static QPair> cleanBraveResponse(const QByteArray& jsonResponse, qsizetype topK = 1) +{ + QJsonParseError err; + QJsonDocument document = QJsonDocument::fromJson(jsonResponse, &err); + if (err.error != QJsonParseError::NoError) { + qWarning() << "ERROR: Couldn't parse: " << jsonResponse << err.errorString(); + return QPair>(); + } + + QJsonObject searchResponse = document.object(); + QJsonObject cleanResponse; + QString query; + QJsonArray cleanArray; + + QList infos; + + if (searchResponse.contains("query")) { + QJsonObject queryObj = searchResponse["query"].toObject(); + if (queryObj.contains("original")) { + query = queryObj["original"].toString(); + } + } + + if (searchResponse.contains("mixed")) { + QJsonObject mixedResults = searchResponse["mixed"].toObject(); + QJsonArray mainResults = mixedResults["main"].toArray(); + + for (int i = 0; i < std::min(mainResults.size(), topK); ++i) { + QJsonObject m = mainResults[i].toObject(); + QString r_type = m["type"].toString(); + int idx = m["index"].toInt(); + QJsonObject resultsObject = searchResponse[r_type].toObject(); + QJsonArray resultsArray = resultsObject["results"].toArray(); + + QJsonValue cleaned; + SourceExcerpt info; + if (r_type == "web") { + // For web data - add a single output from the search + QJsonObject resultObj = resultsArray[idx].toObject(); + QStringList selectedKeys = {"type", "title", "url", "description", "date", "extra_snippets"}; + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (resultObj.contains(key)) { + cleanedObj.insert(key, resultObj[key]); + } + } + + info.date = resultObj["date"].toString(); + info.text = resultObj["description"].toString(); // fixme + info.url = resultObj["url"].toString(); + QJsonObject meta_url = resultObj["meta_url"].toObject(); + info.favicon = meta_url["favicon"].toString(); + info.title = resultObj["title"].toString(); + + cleaned = cleanedObj; + } else if (r_type == "faq") { + // For faq data - take a list of all the questions & answers + QStringList selectedKeys = {"type", "question", "answer", "title", "url"}; + QJsonArray cleanedArray; + for (const auto& q : resultsArray) { + QJsonObject qObj = q.toObject(); + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (qObj.contains(key)) { + cleanedObj.insert(key, qObj[key]); + } + } + cleanedArray.append(cleanedObj); + } + cleaned = cleanedArray; + } else if (r_type == "infobox") { + QJsonObject resultObj = resultsArray[idx].toObject(); + QStringList selectedKeys = {"type", "title", "url", "description", "long_desc"}; + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (resultObj.contains(key)) { + cleanedObj.insert(key, resultObj[key]); + } + } + cleaned = cleanedObj; + } else if (r_type == "videos") { + QStringList selectedKeys = {"type", "url", "title", "description", "date"}; + QJsonArray cleanedArray; + for (const auto& q : resultsArray) { + QJsonObject qObj = q.toObject(); + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (qObj.contains(key)) { + cleanedObj.insert(key, qObj[key]); + } + } + cleanedArray.append(cleanedObj); + } + cleaned = cleanedArray; + } else if (r_type == "locations") { + QStringList selectedKeys = {"type", "title", "url", "description", "coordinates", "postal_address", "contact", "rating", "distance", "zoom_level"}; + QJsonArray cleanedArray; + for (const auto& q : resultsArray) { + QJsonObject qObj = q.toObject(); + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (qObj.contains(key)) { + cleanedObj.insert(key, qObj[key]); + } + } + cleanedArray.append(cleanedObj); + } + cleaned = cleanedArray; + } else if (r_type == "news") { + QStringList selectedKeys = {"type", "title", "url", "description"}; + QJsonArray cleanedArray; + for (const auto& q : resultsArray) { + QJsonObject qObj = q.toObject(); + QJsonObject cleanedObj; + for (const auto& key : selectedKeys) { + if (qObj.contains(key)) { + cleanedObj.insert(key, qObj[key]); + } + } + cleanedArray.append(cleanedObj); + } + cleaned = cleanedArray; + } else { + cleaned = QJsonValue(); + } + + infos.append(info); + cleanArray.append(cleaned); + } + } + + cleanResponse.insert("query", query); + cleanResponse.insert("top_k", cleanArray); + QJsonDocument cleanedDoc(cleanResponse); + +// qDebug().noquote() << document.toJson(QJsonDocument::Indented); +// qDebug().noquote() << cleanedDoc.toJson(QJsonDocument::Indented); + + return qMakePair(cleanedDoc.toJson(QJsonDocument::Indented), infos); +} + +void BraveAPIWorker::handleFinished() +{ + QNetworkReply *jsonReply = qobject_cast(sender()); + Q_ASSERT(jsonReply); + + if (jsonReply->error() == QNetworkReply::NoError && jsonReply->isFinished()) { + QByteArray jsonData = jsonReply->readAll(); + jsonReply->deleteLater(); + m_response = cleanBraveResponse(jsonData, m_topK); + } else { + QByteArray jsonData = jsonReply->readAll(); + qWarning() << "ERROR: Could not search brave" << jsonReply->error() << jsonReply->errorString() << jsonData; + jsonReply->deleteLater(); + } +} + +void BraveAPIWorker::handleErrorOccurred(QNetworkReply::NetworkError code) +{ + QNetworkReply *reply = qobject_cast(sender()); + Q_ASSERT(reply); + qWarning().noquote() << "ERROR: BraveAPIWorker::handleErrorOccurred got HTTP Error" << code << "response:" + << reply->errorString(); + emit finished(); +} diff --git a/gpt4all-chat/bravesearch.h b/gpt4all-chat/bravesearch.h new file mode 100644 index 00000000..482b29a6 --- /dev/null +++ b/gpt4all-chat/bravesearch.h @@ -0,0 +1,51 @@ +#ifndef BRAVESEARCH_H +#define BRAVESEARCH_H + +#include "sourceexcerpt.h" + +#include +#include +#include +#include + +class BraveAPIWorker : public QObject { + Q_OBJECT +public: + BraveAPIWorker() + : QObject(nullptr) + , m_networkManager(nullptr) + , m_topK(1) {} + virtual ~BraveAPIWorker() {} + + QPair> response() const { return m_response; } + +public Q_SLOTS: + void request(const QString &apiKey, const QString &query, int topK); + +Q_SIGNALS: + void finished(); + +private Q_SLOTS: + void handleFinished(); + void handleErrorOccurred(QNetworkReply::NetworkError code); + +private: + QNetworkAccessManager *m_networkManager; + QPair> m_response; + int m_topK; +}; + +class BraveSearch : public QObject { + Q_OBJECT +public: + BraveSearch() + : QObject(nullptr) {} + virtual ~BraveSearch() {} + + QPair> search(const QString &apiKey, const QString &query, int topK, unsigned long timeout = 2000); + +Q_SIGNALS: + void request(const QString &apiKey, const QString &query, int topK); +}; + +#endif // BRAVESEARCH_H diff --git a/gpt4all-chat/chat.cpp b/gpt4all-chat/chat.cpp index a44022c0..7da8274c 100644 --- a/gpt4all-chat/chat.cpp +++ b/gpt4all-chat/chat.cpp @@ -59,6 +59,7 @@ void Chat::connectLLM() connect(m_llmodel, &ChatLLM::responseChanged, this, &Chat::handleResponseChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::promptProcessing, this, &Chat::promptProcessing, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::generatingQuestions, this, &Chat::generatingQuestions, Qt::QueuedConnection); + connect(m_llmodel, &ChatLLM::toolCalled, this, &Chat::toolCalled, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelLoadingWarning, this, &Chat::modelLoadingWarning, Qt::QueuedConnection); @@ -67,7 +68,7 @@ void Chat::connectLLM() connect(m_llmodel, &ChatLLM::generatedQuestionFinished, this, &Chat::generatedQuestionFinished, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection); + connect(m_llmodel, &ChatLLM::sourceExcerptsChanged, this, &Chat::handleSourceExcerptsChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection); @@ -121,6 +122,7 @@ void Chat::resetResponseState() emit tokenSpeedChanged(); m_responseInProgress = true; m_responseState = m_collections.empty() ? Chat::PromptProcessing : Chat::LocalDocsRetrieval; + m_toolDescription = QString(); emit responseInProgressChanged(); emit responseStateChanged(); } @@ -134,7 +136,7 @@ void Chat::prompt(const QString &prompt) void Chat::regenerateResponse() { const int index = m_chatModel->count() - 1; - m_chatModel->updateSources(index, QList()); + m_chatModel->updateSources(index, QList()); emit regenerateResponseRequested(); } @@ -189,8 +191,13 @@ void Chat::handleModelLoadingPercentageChanged(float loadingPercentage) void Chat::promptProcessing() { - m_responseState = !databaseResults().isEmpty() ? Chat::LocalDocsProcessing : Chat::PromptProcessing; - emit responseStateChanged(); + if (sourceExcerpts().isEmpty()) + m_responseState = Chat::PromptProcessing; + else if (m_responseState == Chat::ToolCalled) + m_responseState = Chat::ToolProcessing; + else + m_responseState = Chat::LocalDocsProcessing; + emit responseStateChanged(); } void Chat::generatingQuestions() @@ -199,6 +206,14 @@ void Chat::generatingQuestions() emit responseStateChanged(); } +void Chat::toolCalled(const QString &description) +{ + m_responseState = Chat::ToolCalled; + m_toolDescription = description; + emit toolDescriptionChanged(); + emit responseStateChanged(); +} + void Chat::responseStopped(qint64 promptResponseMs) { m_tokenSpeed = QString(); @@ -357,11 +372,11 @@ QString Chat::fallbackReason() const return m_llmodel->fallbackReason(); } -void Chat::handleDatabaseResultsChanged(const QList &results) +void Chat::handleSourceExcerptsChanged(const QList &sourceExcerpts) { - m_databaseResults = results; + m_sourceExcerpts = sourceExcerpts; const int index = m_chatModel->count() - 1; - m_chatModel->updateSources(index, m_databaseResults); + m_chatModel->updateSources(index, m_sourceExcerpts); } void Chat::handleModelInfoChanged(const ModelInfo &modelInfo) diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h index 065c624e..8f2b7410 100644 --- a/gpt4all-chat/chat.h +++ b/gpt4all-chat/chat.h @@ -40,6 +40,7 @@ class Chat : public QObject // 0=no, 1=waiting, 2=working Q_PROPERTY(int trySwitchContextInProgress READ trySwitchContextInProgress NOTIFY trySwitchContextInProgressChanged) Q_PROPERTY(QList generatedQuestions READ generatedQuestions NOTIFY generatedQuestionsChanged) + Q_PROPERTY(QString toolDescription READ toolDescription NOTIFY toolDescriptionChanged) QML_ELEMENT QML_UNCREATABLE("Only creatable from c++!") @@ -50,7 +51,9 @@ public: LocalDocsProcessing, PromptProcessing, GeneratingQuestions, - ResponseGeneration + ResponseGeneration, + ToolCalled, + ToolProcessing }; Q_ENUM(ResponseState) @@ -81,9 +84,10 @@ public: Q_INVOKABLE void stopGenerating(); Q_INVOKABLE void newPromptResponsePair(const QString &prompt); - QList databaseResults() const { return m_databaseResults; } + QList sourceExcerpts() const { return m_sourceExcerpts; } QString response() const; + QString toolDescription() const { return m_toolDescription; } bool responseInProgress() const { return m_responseInProgress; } ResponseState responseState() const; ModelInfo modelInfo() const; @@ -158,19 +162,21 @@ Q_SIGNALS: void trySwitchContextInProgressChanged(); void loadedModelInfoChanged(); void generatedQuestionsChanged(); + void toolDescriptionChanged(); private Q_SLOTS: void handleResponseChanged(const QString &response); void handleModelLoadingPercentageChanged(float); void promptProcessing(); void generatingQuestions(); + void toolCalled(const QString &description); void responseStopped(qint64 promptResponseMs); void generatedNameChanged(const QString &name); void generatedQuestionFinished(const QString &question); void handleRestoringFromText(); void handleModelLoadingError(const QString &error); void handleTokenSpeedChanged(const QString &tokenSpeed); - void handleDatabaseResultsChanged(const QList &results); + void handleSourceExcerptsChanged(const QList &sourceExcerpts); void handleModelInfoChanged(const ModelInfo &modelInfo); void handleTrySwitchContextOfLoadedModelCompleted(int value); @@ -185,6 +191,7 @@ private: QString m_device; QString m_fallbackReason; QString m_response; + QString m_toolDescription; QList m_collections; QList m_generatedQuestions; ChatModel *m_chatModel; @@ -192,7 +199,7 @@ private: ResponseState m_responseState; qint64 m_creationDate; ChatLLM *m_llmodel; - QList m_databaseResults; + QList m_sourceExcerpts; bool m_isServer = false; bool m_shouldDeleteLater = false; float m_modelLoadingPercentage = 0.0f; diff --git a/gpt4all-chat/chatlistmodel.cpp b/gpt4all-chat/chatlistmodel.cpp index b4afb39f..c5be4338 100644 --- a/gpt4all-chat/chatlistmodel.cpp +++ b/gpt4all-chat/chatlistmodel.cpp @@ -19,7 +19,7 @@ #include #define CHAT_FORMAT_MAGIC 0xF5D553CC -#define CHAT_FORMAT_VERSION 9 +#define CHAT_FORMAT_VERSION 10 class MyChatListModel: public ChatListModel { }; Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance) diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index e9fb7f31..a6dea395 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -1,5 +1,6 @@ #include "chatllm.h" +#include "bravesearch.h" #include "chat.h" #include "chatapi.h" #include "localdocs.h" @@ -10,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +115,7 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer) , m_reloadingToChangeVariant(false) , m_processedSystemPrompt(false) , m_restoreStateFromText(false) + , m_maybeToolCall(false) { moveToThread(&m_llmThread); connect(this, &ChatLLM::shouldBeLoadedChanged, this, &ChatLLM::handleShouldBeLoadedChanged, @@ -702,13 +705,44 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response) return false; } + // Only valid for llama 3.1 instruct + if (m_modelInfo.filename().startsWith("Meta-Llama-3.1-8B-Instruct")) { + // Based on https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#built-in-python-based-tool-calling + // For brave_search and wolfram_alpha ipython is always used + + // <|python_tag|> + // brave_search.call(query="...") + // <|eom_id|> + const int eom_id = 128008; + const int python_tag = 128010; + + // If we have a built-in tool call, then it should be the first token + const bool isFirstResponseToken = m_promptResponseTokens == m_promptTokens; + Q_ASSERT(token != python_tag || isFirstResponseToken); + if (isFirstResponseToken && token == python_tag) { + m_maybeToolCall = true; + ++m_promptResponseTokens; + return !m_stopGenerating; + } + + // Check for end of built-in tool call + Q_ASSERT(token != eom_id || !m_maybeToolCall); + if (token == eom_id) { + ++m_promptResponseTokens; + return false; + } + } + // m_promptResponseTokens is related to last prompt/response not // the entire context window which we can reset on regenerate prompt ++m_promptResponseTokens; m_timer->inc(); Q_ASSERT(!response.empty()); m_response.append(response); - emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response))); + + if (!m_maybeToolCall) + emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response))); + return !m_stopGenerating; } @@ -735,24 +769,24 @@ bool ChatLLM::prompt(const QList &collectionList, const QString &prompt } bool ChatLLM::promptInternal(const QList &collectionList, const QString &prompt, const QString &promptTemplate, - int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, - int32_t repeat_penalty_tokens) + int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, + int32_t repeat_penalty_tokens) { if (!isModelLoaded()) return false; - QList databaseResults; + QList databaseResults; const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize(); if (!collectionList.isEmpty()) { emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks - emit databaseResultsChanged(databaseResults); + emit sourceExcerptsChanged(databaseResults); } // Augment the prompt template with the results if any QString docsContext; if (!databaseResults.isEmpty()) { QStringList results; - for (const ResultInfo &info : databaseResults) + for (const SourceExcerpt &info : databaseResults) results << u"Collection: %1\nPath: %2\nExcerpt: %3"_s.arg(info.collection, info.path, info.text); // FIXME(jared): use a Jinja prompt template instead of hardcoded Alpaca-style localdocs template @@ -797,21 +831,66 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString m_timer->stop(); qint64 elapsed = totalTime.elapsed(); std::string trimmed = trim_whitespace(m_response); - if (trimmed != m_response) { - m_response = trimmed; - emit responseChanged(QString::fromStdString(m_response)); - } + if (m_maybeToolCall) { + m_maybeToolCall = false; + m_ctx.n_past = std::max(0, m_ctx.n_past); + m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end()); + m_promptResponseTokens = 0; + m_promptTokens = 0; + m_response = std::string(); + return toolCallInternal(QString::fromStdString(trimmed), n_predict, top_k, top_p, min_p, temp, + n_batch, repeat_penalty, repeat_penalty_tokens); + } else { + if (trimmed != m_response) { + m_response = trimmed; + emit responseChanged(QString::fromStdString(m_response)); + } - SuggestionMode mode = MySettings::globalInstance()->suggestionMode(); - if (mode == SuggestionMode::On || (!databaseResults.isEmpty() && mode == SuggestionMode::LocalDocsOnly)) - generateQuestions(elapsed); - else - emit responseStopped(elapsed); + SuggestionMode mode = MySettings::globalInstance()->suggestionMode(); + if (mode == SuggestionMode::On || (!databaseResults.isEmpty() && mode == SuggestionMode::LocalDocsOnly)) + generateQuestions(elapsed); + else + emit responseStopped(elapsed); + } m_pristineLoadedState = false; return true; } +bool ChatLLM::toolCallInternal(const QString &toolCall, int32_t n_predict, int32_t top_k, float top_p, + float min_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens) +{ + Q_ASSERT(m_modelInfo.filename().startsWith("Meta-Llama-3.1-8B-Instruct")); + emit toolCalled(tr("searching web...")); + + // Based on https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#built-in-python-based-tool-calling + // For brave_search and wolfram_alpha ipython is always used + + static QRegularExpression re(R"(brave_search\.call\(query=\"([^\"]+)\"\))"); + QRegularExpressionMatch match = re.match(toolCall); + + QString prompt("<|start_header_id|>ipython<|end_header_id|>\n\n%1<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n%2"); + QString query; + if (match.hasMatch()) { + query = match.captured(1); + } else { + qWarning() << "WARNING: Could not find the tool for " << toolCall; + return promptInternal(QList()/*collectionList*/, prompt.arg(QString()), QString("%1") /*promptTemplate*/, + n_predict, top_k, top_p, min_p, temp, n_batch, repeat_penalty, repeat_penalty_tokens); + } + + const QString apiKey = MySettings::globalInstance()->braveSearchAPIKey(); + Q_ASSERT(apiKey != ""); + + BraveSearch brave; + const QPair> braveResponse = brave.search(apiKey, query, 2 /*topK*/, 2000 /*msecs to timeout*/); + + emit sourceExcerptsChanged(braveResponse.second); + + return promptInternal(QList()/*collectionList*/, prompt.arg(braveResponse.first), QString("%1") /*promptTemplate*/, + n_predict, top_k, top_p, min_p, temp, n_batch, repeat_penalty, repeat_penalty_tokens); +} + void ChatLLM::setShouldBeLoaded(bool b) { #if defined(DEBUG_MODEL_LOADING) diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h index d123358a..31e99c71 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/chatllm.h @@ -180,6 +180,7 @@ Q_SIGNALS: void responseChanged(const QString &response); void promptProcessing(); void generatingQuestions(); + void toolCalled(const QString &description); void responseStopped(qint64 promptResponseMs); void generatedNameChanged(const QString &name); void generatedQuestionFinished(const QString &generatedQuestion); @@ -188,17 +189,19 @@ Q_SIGNALS: void shouldBeLoadedChanged(); void trySwitchContextRequested(const ModelInfo &modelInfo); void trySwitchContextOfLoadedModelCompleted(int value); - void requestRetrieveFromDB(const QList &collections, const QString &text, int retrievalSize, QList *results); + void requestRetrieveFromDB(const QList &collections, const QString &text, int retrievalSize, QList *results); void reportSpeed(const QString &speed); void reportDevice(const QString &device); void reportFallbackReason(const QString &fallbackReason); - void databaseResultsChanged(const QList&); + void sourceExcerptsChanged(const QList&); void modelInfoChanged(const ModelInfo &modelInfo); protected: bool promptInternal(const QList &collectionList, const QString &prompt, const QString &promptTemplate, int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens); + bool toolCallInternal(const QString &toolcall, int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, + int32_t repeat_penalty_tokens); bool handlePrompt(int32_t token); bool handleResponse(int32_t token, const std::string &response); bool handleNamePrompt(int32_t token); @@ -239,11 +242,13 @@ private: bool m_reloadingToChangeVariant; bool m_processedSystemPrompt; bool m_restoreStateFromText; + bool m_maybeToolCall; // m_pristineLoadedState is set if saveSate is unnecessary, either because: // - an unload was queued during LLModel::restoreState() // - the chat will be restored from text and hasn't been interacted with yet bool m_pristineLoadedState = false; QVector> m_stateFromText; + QNetworkAccessManager m_networkManager; // FIXME REMOVE }; #endif // CHATLLM_H diff --git a/gpt4all-chat/chatmodel.h b/gpt4all-chat/chatmodel.h index f061ccf7..7982d014 100644 --- a/gpt4all-chat/chatmodel.h +++ b/gpt4all-chat/chatmodel.h @@ -28,8 +28,8 @@ struct ChatItem Q_PROPERTY(bool stopped MEMBER stopped) Q_PROPERTY(bool thumbsUpState MEMBER thumbsUpState) Q_PROPERTY(bool thumbsDownState MEMBER thumbsDownState) - Q_PROPERTY(QList sources MEMBER sources) - Q_PROPERTY(QList consolidatedSources MEMBER consolidatedSources) + Q_PROPERTY(QList sources MEMBER sources) + Q_PROPERTY(QList consolidatedSources MEMBER consolidatedSources) public: // TODO: Maybe we should include the model name here as well as timestamp? @@ -38,8 +38,8 @@ public: QString value; QString prompt; QString newResponse; - QList sources; - QList consolidatedSources; + QList sources; + QList consolidatedSources; bool currentResponse = false; bool stopped = false; bool thumbsUpState = false; @@ -200,20 +200,20 @@ public: } } - QList consolidateSources(const QList &sources) { - QMap groupedData; - for (const ResultInfo &info : sources) { + QList consolidateSources(const QList &sources) { + QMap groupedData; + for (const SourceExcerpt &info : sources) { if (groupedData.contains(info.file)) { groupedData[info.file].text += "\n---\n" + info.text; } else { groupedData[info.file] = info; } } - QList consolidatedSources = groupedData.values(); + QList consolidatedSources = groupedData.values(); return consolidatedSources; } - Q_INVOKABLE void updateSources(int index, const QList &sources) + Q_INVOKABLE void updateSources(int index, const QList &sources) { if (index < 0 || index >= m_chatItems.size()) return; @@ -274,7 +274,7 @@ public: stream << c.thumbsDownState; if (version > 7) { stream << c.sources.size(); - for (const ResultInfo &info : c.sources) { + for (const SourceExcerpt &info : c.sources) { Q_ASSERT(!info.file.isEmpty()); stream << info.collection; stream << info.path; @@ -286,12 +286,16 @@ public: stream << info.page; stream << info.from; stream << info.to; + if (version > 9) { + stream << info.url; + stream << info.favicon; + } } } else if (version > 2) { QList references; QList referencesContext; int validReferenceNumber = 1; - for (const ResultInfo &info : c.sources) { + for (const SourceExcerpt &info : c.sources) { if (info.file.isEmpty()) continue; @@ -345,9 +349,9 @@ public: if (version > 7) { qsizetype count; stream >> count; - QList sources; + QList sources; for (int i = 0; i < count; ++i) { - ResultInfo info; + SourceExcerpt info; stream >> info.collection; stream >> info.path; stream >> info.file; @@ -358,6 +362,10 @@ public: stream >> info.page; stream >> info.from; stream >> info.to; + if (version > 9) { + stream >> info.url; + stream >> info.favicon; + } sources.append(info); } c.sources = sources; @@ -369,7 +377,7 @@ public: stream >> referencesContext; if (!references.isEmpty()) { - QList sources; + QList sources; QList referenceList = references.split("\n"); // Ignore empty lines and those that begin with "---" which is no longer used @@ -384,7 +392,7 @@ public: for (int j = 0; j < referenceList.size(); ++j) { QString reference = referenceList[j]; QString context = referencesContext[j]; - ResultInfo info; + SourceExcerpt info; QTextStream refStream(&reference); QString dummy; int validReferenceNumber; diff --git a/gpt4all-chat/database.cpp b/gpt4all-chat/database.cpp index 02ab5364..f2fc5094 100644 --- a/gpt4all-chat/database.cpp +++ b/gpt4all-chat/database.cpp @@ -1938,7 +1938,7 @@ QList Database::searchEmbeddings(const std::vector &query, const QLi } void Database::retrieveFromDB(const QList &collections, const QString &text, int retrievalSize, - QList *results) + QList *results) { #if defined(DEBUG) qDebug() << "retrieveFromDB" << collections << text << retrievalSize; @@ -1974,7 +1974,7 @@ void Database::retrieveFromDB(const QList &collections, const QString & const int from = q.value(8).toInt(); const int to = q.value(9).toInt(); const QString collectionName = q.value(10).toString(); - ResultInfo info; + SourceExcerpt info; info.collection = collectionName; info.path = document_path; info.file = file; diff --git a/gpt4all-chat/database.h b/gpt4all-chat/database.h index 90312290..fd0a78d9 100644 --- a/gpt4all-chat/database.h +++ b/gpt4all-chat/database.h @@ -2,6 +2,7 @@ #define DATABASE_H #include "embllm.h" // IWYU pragma: keep +#include "sourceexcerpt.h" #include #include @@ -49,64 +50,6 @@ struct DocumentInfo } }; -struct ResultInfo { - Q_GADGET - Q_PROPERTY(QString collection MEMBER collection) - Q_PROPERTY(QString path MEMBER path) - Q_PROPERTY(QString file MEMBER file) - Q_PROPERTY(QString title MEMBER title) - Q_PROPERTY(QString author MEMBER author) - Q_PROPERTY(QString date MEMBER date) - Q_PROPERTY(QString text MEMBER text) - Q_PROPERTY(int page MEMBER page) - Q_PROPERTY(int from MEMBER from) - Q_PROPERTY(int to MEMBER to) - Q_PROPERTY(QString fileUri READ fileUri STORED false) - -public: - QString collection; // [Required] The name of the collection - QString path; // [Required] The full path - QString file; // [Required] The name of the file, but not the full path - QString title; // [Optional] The title of the document - QString author; // [Optional] The author of the document - QString date; // [Required] The creation or the last modification date whichever is latest - QString text; // [Required] The text actually used in the augmented context - int page = -1; // [Optional] The page where the text was found - int from = -1; // [Optional] The line number where the text begins - int to = -1; // [Optional] The line number where the text ends - - QString fileUri() const { - // QUrl reserved chars that are not UNSAFE_PATH according to glib/gconvert.c - static const QByteArray s_exclude = "!$&'()*+,/:=@~"_ba; - - Q_ASSERT(!QFileInfo(path).isRelative()); -#ifdef Q_OS_WINDOWS - Q_ASSERT(!path.contains('\\')); // Qt normally uses forward slash as path separator -#endif - - auto escaped = QString::fromUtf8(QUrl::toPercentEncoding(path, s_exclude)); - if (escaped.front() != '/') - escaped = '/' + escaped; - return u"file://"_s + escaped; - } - - bool operator==(const ResultInfo &other) const { - return file == other.file && - title == other.title && - author == other.author && - date == other.date && - text == other.text && - page == other.page && - from == other.from && - to == other.to; - } - bool operator!=(const ResultInfo &other) const { - return !(*this == other); - } -}; - -Q_DECLARE_METATYPE(ResultInfo) - struct CollectionItem { // -- Fields persisted to database -- @@ -158,7 +101,7 @@ public Q_SLOTS: void forceRebuildFolder(const QString &path); bool addFolder(const QString &collection, const QString &path, const QString &embedding_model); void removeFolder(const QString &collection, const QString &path); - void retrieveFromDB(const QList &collections, const QString &text, int retrievalSize, QList *results); + void retrieveFromDB(const QList &collections, const QString &text, int retrievalSize, QList *results); void changeChunkSize(int chunkSize); void changeFileExtensions(const QStringList &extensions); @@ -225,7 +168,7 @@ private: QStringList m_scannedFileExtensions; QTimer *m_scanTimer; QMap> m_docsToScan; - QList m_retrieve; + QList m_retrieve; QThread m_dbThread; QFileSystemWatcher *m_watcher; QSet m_watchedPaths; diff --git a/gpt4all-chat/mysettings.cpp b/gpt4all-chat/mysettings.cpp index 525ccc1e..57f94ca2 100644 --- a/gpt4all-chat/mysettings.cpp +++ b/gpt4all-chat/mysettings.cpp @@ -456,6 +456,7 @@ bool MySettings::localDocsUseRemoteEmbed() const { return getBasicSetting QString MySettings::localDocsNomicAPIKey() const { return getBasicSetting("localdocs/nomicAPIKey" ).toString(); } QString MySettings::localDocsEmbedDevice() const { return getBasicSetting("localdocs/embedDevice" ).toString(); } QString MySettings::networkAttribution() const { return getBasicSetting("network/attribution" ).toString(); } +QString MySettings::braveSearchAPIKey() const { return getBasicSetting("bravesearch/APIKey" ).toString(); } ChatTheme MySettings::chatTheme() const { return ChatTheme (getEnumSetting("chatTheme", chatThemeNames)); } FontSize MySettings::fontSize() const { return FontSize (getEnumSetting("fontSize", fontSizeNames)); } @@ -474,6 +475,7 @@ void MySettings::setLocalDocsUseRemoteEmbed(bool value) { setBasic void MySettings::setLocalDocsNomicAPIKey(const QString &value) { setBasicSetting("localdocs/nomicAPIKey", value, "localDocsNomicAPIKey"); } void MySettings::setLocalDocsEmbedDevice(const QString &value) { setBasicSetting("localdocs/embedDevice", value, "localDocsEmbedDevice"); } void MySettings::setNetworkAttribution(const QString &value) { setBasicSetting("network/attribution", value, "networkAttribution"); } +void MySettings::setBraveSearchAPIKey(const QString &value) { setBasicSetting("bravesearch/APIKey", value, "braveSearchAPIKey"); } void MySettings::setChatTheme(ChatTheme value) { setBasicSetting("chatTheme", chatThemeNames .value(int(value))); } void MySettings::setFontSize(FontSize value) { setBasicSetting("fontSize", fontSizeNames .value(int(value))); } diff --git a/gpt4all-chat/mysettings.h b/gpt4all-chat/mysettings.h index 3db8b234..d4aa4c5a 100644 --- a/gpt4all-chat/mysettings.h +++ b/gpt4all-chat/mysettings.h @@ -72,6 +72,7 @@ class MySettings : public QObject Q_PROPERTY(int networkPort READ networkPort WRITE setNetworkPort NOTIFY networkPortChanged) Q_PROPERTY(SuggestionMode suggestionMode READ suggestionMode WRITE setSuggestionMode NOTIFY suggestionModeChanged) Q_PROPERTY(QStringList uiLanguages MEMBER m_uiLanguages CONSTANT) + Q_PROPERTY(QString braveSearchAPIKey READ braveSearchAPIKey WRITE setBraveSearchAPIKey NOTIFY braveSearchAPIKeyChanged) public: static MySettings *globalInstance(); @@ -185,6 +186,10 @@ public: QString localDocsEmbedDevice() const; void setLocalDocsEmbedDevice(const QString &value); + // Tool settings + QString braveSearchAPIKey() const; + void setBraveSearchAPIKey(const QString &value); + // Network settings QString networkAttribution() const; void setNetworkAttribution(const QString &value); @@ -239,6 +244,7 @@ Q_SIGNALS: void deviceChanged(); void suggestionModeChanged(); void languageAndLocaleChanged(); + void braveSearchAPIKeyChanged(); private: QSettings m_settings; diff --git a/gpt4all-chat/qml/ChatView.qml b/gpt4all-chat/qml/ChatView.qml index 920e2759..75a6fc14 100644 --- a/gpt4all-chat/qml/ChatView.qml +++ b/gpt4all-chat/qml/ChatView.qml @@ -881,6 +881,8 @@ Rectangle { case Chat.PromptProcessing: return qsTr("processing ...") case Chat.ResponseGeneration: return qsTr("generating response ..."); case Chat.GeneratingQuestions: return qsTr("generating questions ..."); + case Chat.ToolCalled: return currentChat.toolDescription; + case Chat.ToolProcessing: return qsTr("processing web results ..."); // FIXME should not be hardcoded! default: return ""; // handle unexpected values } } @@ -1131,7 +1133,7 @@ Rectangle { sourceSize.width: 24 sourceSize.height: 24 mipmap: true - source: "qrc:/gpt4all/icons/db.svg" + source: consolidatedSources[0].url === "" ? "qrc:/gpt4all/icons/db.svg" : "qrc:/gpt4all/icons/globe.svg" } ColorOverlay { @@ -1243,11 +1245,15 @@ Rectangle { MouseArea { id: ma - enabled: modelData.path !== "" + enabled: modelData.path !== "" || modelData.url !== "" anchors.fill: parent hoverEnabled: true onClicked: function() { - Qt.openUrlExternally(modelData.fileUri) + if (modelData.url !== "") { + console.log("opening url") + Qt.openUrlExternally(modelData.url) + } else + Qt.openUrlExternally(modelData.fileUri) } } @@ -1287,22 +1293,27 @@ Rectangle { Image { id: fileIcon anchors.fill: parent - visible: false + visible: modelData.favicon !== "" sourceSize.width: 24 sourceSize.height: 24 mipmap: true source: { - if (modelData.file.toLowerCase().endsWith(".txt")) + if (modelData.favicon !== "") + return modelData.favicon; + else if (modelData.file.toLowerCase().endsWith(".txt")) return "qrc:/gpt4all/icons/file-txt.svg" else if (modelData.file.toLowerCase().endsWith(".pdf")) return "qrc:/gpt4all/icons/file-pdf.svg" else if (modelData.file.toLowerCase().endsWith(".md")) return "qrc:/gpt4all/icons/file-md.svg" - else + else if (modelData.file !== "") return "qrc:/gpt4all/icons/file.svg" + else + return "qrc:/gpt4all/icons/globe.svg" } } ColorOverlay { + visible: !fileIcon.visible anchors.fill: fileIcon source: fileIcon color: theme.textColor @@ -1310,7 +1321,7 @@ Rectangle { } Text { Layout.maximumWidth: 156 - text: modelData.collection !== "" ? modelData.collection : qsTr("LocalDocs") + text: modelData.collection !== "" ? modelData.collection : modelData.title font.pixelSize: theme.fontSizeLarge font.bold: true color: theme.styledTextColor @@ -1326,7 +1337,7 @@ Rectangle { Layout.fillHeight: true Layout.maximumWidth: 180 Layout.maximumHeight: 55 - title.height - text: modelData.file + text: modelData.file !== "" ? modelData.file : modelData.url color: theme.textColor font.pixelSize: theme.fontSizeSmall elide: Qt.ElideRight diff --git a/gpt4all-chat/qml/SettingsView.qml b/gpt4all-chat/qml/SettingsView.qml index 176d0418..d421dad2 100644 --- a/gpt4all-chat/qml/SettingsView.qml +++ b/gpt4all-chat/qml/SettingsView.qml @@ -34,6 +34,9 @@ Rectangle { ListElement { title: qsTr("LocalDocs") } + ListElement { + title: qsTr("Tools") + } } ColumnLayout { @@ -152,6 +155,12 @@ Rectangle { Component { LocalDocsSettings { } } ] } + + MySettingsStack { + tabs: [ + Component { ToolSettings { } } + ] + } } } } diff --git a/gpt4all-chat/qml/ToolSettings.qml b/gpt4all-chat/qml/ToolSettings.qml new file mode 100644 index 00000000..2fc1cd32 --- /dev/null +++ b/gpt4all-chat/qml/ToolSettings.qml @@ -0,0 +1,71 @@ +import QtCore +import QtQuick +import QtQuick.Controls +import QtQuick.Controls.Basic +import QtQuick.Layouts +import QtQuick.Dialogs +import localdocs +import modellist +import mysettings +import network + +MySettingsTab { + onRestoreDefaultsClicked: { + MySettings.restoreLocalDocsDefaults(); + } + + showRestoreDefaultsButton: true + + title: qsTr("Tools") + contentItem: ColumnLayout { + id: root + spacing: 30 + + ColumnLayout { + spacing: 10 + Label { + color: theme.grayRed900 + font.pixelSize: theme.fontSizeLarge + font.bold: true + text: qsTr("Brave Search") + } + + Rectangle { + Layout.fillWidth: true + height: 1 + color: theme.grayRed500 + } + } + + RowLayout { + MySettingsLabel { + id: apiKeyLabel + text: qsTr("Brave AI API key") + helpText: qsTr('The API key to use for Brave Web Search. Get one from the Brave for free API keys page.') + onLinkActivated: function(link) { Qt.openUrlExternally(link) } + } + + MyTextField { + id: apiKeyField + text: MySettings.braveSearchAPIKey + color: theme.textColor + font.pixelSize: theme.fontSizeLarge + Layout.alignment: Qt.AlignRight + Layout.minimumWidth: 200 + onEditingFinished: { + MySettings.braveSearchAPIKey = apiKeyField.text; + } + Accessible.role: Accessible.EditableText + Accessible.name: apiKeyLabel.text + Accessible.description: apiKeyLabel.helpText + } + } + + Rectangle { + Layout.topMargin: 15 + Layout.fillWidth: true + height: 1 + color: theme.settingsDivider + } + } +} diff --git a/gpt4all-chat/server.cpp b/gpt4all-chat/server.cpp index c8485d93..e655bf9f 100644 --- a/gpt4all-chat/server.cpp +++ b/gpt4all-chat/server.cpp @@ -56,27 +56,13 @@ static inline QJsonObject modelToJson(const ModelInfo &info) return model; } -static inline QJsonObject resultToJson(const ResultInfo &info) -{ - QJsonObject result; - result.insert("file", info.file); - result.insert("title", info.title); - result.insert("author", info.author); - result.insert("date", info.date); - result.insert("text", info.text); - result.insert("page", info.page); - result.insert("from", info.from); - result.insert("to", info.to); - return result; -} - Server::Server(Chat *chat) : ChatLLM(chat, true /*isServer*/) , m_chat(chat) , m_server(nullptr) { connect(this, &Server::threadStarted, this, &Server::start); - connect(this, &Server::databaseResultsChanged, this, &Server::handleDatabaseResultsChanged); + connect(this, &Server::sourceExcerptsChanged, this, &Server::handleSourceExcerptsChanged); connect(chat, &Chat::collectionListChanged, this, &Server::handleCollectionListChanged, Qt::QueuedConnection); } @@ -373,7 +359,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re int promptTokens = 0; int responseTokens = 0; - QList>> responses; + QList>> responses; for (int i = 0; i < n; ++i) { if (!promptInternal( m_collections, @@ -394,7 +380,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re QString echoedPrompt = actualPrompt; if (!echoedPrompt.endsWith("\n")) echoedPrompt += "\n"; - responses.append(qMakePair((echo ? u"%1\n"_s.arg(actualPrompt) : QString()) + response(), m_databaseResults)); + responses.append(qMakePair((echo ? u"%1\n"_s.arg(actualPrompt) : QString()) + response(), m_sourceExcerpts)); if (!promptTokens) promptTokens += m_promptTokens; responseTokens += m_promptResponseTokens - m_promptTokens; @@ -414,7 +400,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re int index = 0; for (const auto &r : responses) { QString result = r.first; - QList infos = r.second; + QList infos = r.second; QJsonObject choice; choice.insert("index", index++); choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop"); @@ -425,7 +411,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re if (MySettings::globalInstance()->localDocsShowReferences()) { QJsonArray references; for (const auto &ref : infos) - references.append(resultToJson(ref)); + references.append(ref.toJson()); choice.insert("references", references); } choices.append(choice); @@ -434,7 +420,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re int index = 0; for (const auto &r : responses) { QString result = r.first; - QList infos = r.second; + QList infos = r.second; QJsonObject choice; choice.insert("text", result); choice.insert("index", index++); @@ -443,7 +429,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re if (MySettings::globalInstance()->localDocsShowReferences()) { QJsonArray references; for (const auto &ref : infos) - references.append(resultToJson(ref)); + references.append(ref.toJson()); choice.insert("references", references); } choices.append(choice); diff --git a/gpt4all-chat/server.h b/gpt4all-chat/server.h index 689f0b60..0c6f6eb6 100644 --- a/gpt4all-chat/server.h +++ b/gpt4all-chat/server.h @@ -2,7 +2,7 @@ #define SERVER_H #include "chatllm.h" -#include "database.h" +#include "sourceexcerpt.h" #include #include @@ -29,13 +29,13 @@ Q_SIGNALS: private Q_SLOTS: QHttpServerResponse handleCompletionRequest(const QHttpServerRequest &request, bool isChat); - void handleDatabaseResultsChanged(const QList &results) { m_databaseResults = results; } + void handleSourceExcerptsChanged(const QList &sourceExcerpts) { m_sourceExcerpts = sourceExcerpts; } void handleCollectionListChanged(const QList &collectionList) { m_collections = collectionList; } private: Chat *m_chat; QHttpServer *m_server; - QList m_databaseResults; + QList m_sourceExcerpts; QList m_collections; }; diff --git a/gpt4all-chat/sourceexcerpt.h b/gpt4all-chat/sourceexcerpt.h new file mode 100644 index 00000000..91497e9d --- /dev/null +++ b/gpt4all-chat/sourceexcerpt.h @@ -0,0 +1,95 @@ +#ifndef SOURCEEXCERT_H +#define SOURCEEXCERT_H + +#include +#include +#include +#include + +using namespace Qt::Literals::StringLiterals; + +struct SourceExcerpt { + Q_GADGET + Q_PROPERTY(QString date MEMBER date) + Q_PROPERTY(QString text MEMBER text) + Q_PROPERTY(QString collection MEMBER collection) + Q_PROPERTY(QString path MEMBER path) + Q_PROPERTY(QString file MEMBER file) + Q_PROPERTY(QString url MEMBER url) + Q_PROPERTY(QString favicon MEMBER favicon) + Q_PROPERTY(QString title MEMBER title) + Q_PROPERTY(QString author MEMBER author) + Q_PROPERTY(int page MEMBER page) + Q_PROPERTY(int from MEMBER from) + Q_PROPERTY(int to MEMBER to) + Q_PROPERTY(QString fileUri READ fileUri STORED false) + +public: + QString date; // [Required] The creation or the last modification date whichever is latest + QString text; // [Required] The text actually used in the augmented context + QString collection; // [Optional] The name of the collection + QString path; // [Optional] The full path + QString file; // [Optional] The name of the file, but not the full path + QString url; // [Optional] The name of the remote url + QString favicon; // [Optional] The favicon + QString title; // [Optional] The title of the document + QString author; // [Optional] The author of the document + int page = -1; // [Optional] The page where the text was found + int from = -1; // [Optional] The line number where the text begins + int to = -1; // [Optional] The line number where the text ends + + QString fileUri() const { + // QUrl reserved chars that are not UNSAFE_PATH according to glib/gconvert.c + static const QByteArray s_exclude = "!$&'()*+,/:=@~"_ba; + + Q_ASSERT(!QFileInfo(path).isRelative()); +#ifdef Q_OS_WINDOWS + Q_ASSERT(!path.contains('\\')); // Qt normally uses forward slash as path separator +#endif + + auto escaped = QString::fromUtf8(QUrl::toPercentEncoding(path, s_exclude)); + if (escaped.front() != '/') + escaped = '/' + escaped; + return u"file://"_s + escaped; + } + + QJsonObject toJson() const + { + QJsonObject result; + result.insert("date", date); + result.insert("text", text); + result.insert("collection", collection); + result.insert("path", path); + result.insert("file", file); + result.insert("url", url); + result.insert("favicon", favicon); + result.insert("title", title); + result.insert("author", author); + result.insert("page", page); + result.insert("from", from); + result.insert("to", to); + return result; + } + + bool operator==(const SourceExcerpt &other) const { + return date == other.date && + text == other.text && + collection == other.collection && + path == other.path && + file == other.file && + url == other.url && + favicon == other.favicon && + title == other.title && + author == other.author && + page == other.page && + from == other.from && + to == other.to; + } + bool operator!=(const SourceExcerpt &other) const { + return !(*this == other); + } +}; + +Q_DECLARE_METATYPE(SourceExcerpt) + +#endif // SOURCEEXCERT_H