Serialize the source excerpts from and to pure json

Signed-off-by: Adam Treat <treat.adam@gmail.com>
This commit is contained in:
Adam Treat 2024-08-01 11:18:50 -04:00
parent 01f67c74ea
commit 27b86dae21
6 changed files with 149 additions and 185 deletions

View File

@ -787,10 +787,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
QString docsContext;
if (!localDocsExcerpts.isEmpty()) {
// FIXME(adam): we should be using the new tool template if available otherwise this I guess
QStringList results;
for (const SourceExcerpt &info : localDocsExcerpts)
results << u"Collection: %1\nPath: %2\nExcerpt: %3"_s.arg(info.collection, info.path, info.text);
docsContext = u"### Context:\n%1\n\n"_s.arg(results.join("\n\n"));
QString json = SourceExcerpt::toJson(localDocsExcerpts);
docsContext = u"### Context:\n%1\n\n"_s.arg(json);
}
int n_threads = MySettings::globalInstance()->threadCount();
@ -900,9 +898,6 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
emit sourceExcerptsChanged(sourceExcerpts);
}
// Erase the context of the tool call
m_ctx.n_past = std::max(0, m_ctx.n_past);
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
m_promptResponseTokens = 0;
m_promptTokens = 0;
m_response = std::string();

View File

@ -29,7 +29,6 @@ struct ChatItem
Q_PROPERTY(bool thumbsUpState MEMBER thumbsUpState)
Q_PROPERTY(bool thumbsDownState MEMBER thumbsDownState)
Q_PROPERTY(QList<SourceExcerpt> sources MEMBER sources)
Q_PROPERTY(QList<SourceExcerpt> consolidatedSources MEMBER consolidatedSources)
public:
// TODO: Maybe we should include the model name here as well as timestamp?
@ -39,7 +38,6 @@ public:
QString prompt;
QString newResponse;
QList<SourceExcerpt> sources;
QList<SourceExcerpt> consolidatedSources;
bool currentResponse = false;
bool stopped = false;
bool thumbsUpState = false;
@ -65,8 +63,7 @@ public:
StoppedRole,
ThumbsUpStateRole,
ThumbsDownStateRole,
SourcesRole,
ConsolidatedSourcesRole
SourcesRole
};
int rowCount(const QModelIndex &parent = QModelIndex()) const override
@ -102,8 +99,6 @@ public:
return item.thumbsDownState;
case SourcesRole:
return QVariant::fromValue(item.sources);
case ConsolidatedSourcesRole:
return QVariant::fromValue(item.consolidatedSources);
}
return QVariant();
@ -122,7 +117,6 @@ public:
roles[ThumbsUpStateRole] = "thumbsUpState";
roles[ThumbsDownStateRole] = "thumbsDownState";
roles[SourcesRole] = "sources";
roles[ConsolidatedSourcesRole] = "consolidatedSources";
return roles;
}
@ -200,20 +194,6 @@ public:
}
}
QList<SourceExcerpt> consolidateSources(const QList<SourceExcerpt> &sources) {
QMap<QString, SourceExcerpt> groupedData;
for (const SourceExcerpt &info : sources) {
QString key = !info.file.isEmpty() ? info.file : info.url;
if (groupedData.contains(key)) {
groupedData[key].text += "\n---\n" + info.text;
} else {
groupedData[key] = info;
}
}
QList<SourceExcerpt> consolidatedSources = groupedData.values();
return consolidatedSources;
}
Q_INVOKABLE void updateSources(int index, const QList<SourceExcerpt> &sources)
{
if (index < 0 || index >= m_chatItems.size()) return;
@ -221,13 +201,10 @@ public:
ChatItem &item = m_chatItems[index];
if (sources.isEmpty()) {
item.sources.clear();
item.consolidatedSources.clear();
} else {
item.sources << sources;
item.consolidatedSources << consolidateSources(sources);
}
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {SourcesRole});
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ConsolidatedSourcesRole});
}
Q_INVOKABLE void updateThumbsUpState(int index, bool b)
@ -278,61 +255,7 @@ public:
stream << c.stopped;
stream << c.thumbsUpState;
stream << c.thumbsDownState;
if (version > 7) {
stream << c.sources.size();
for (const SourceExcerpt &info : c.sources) {
Q_ASSERT(!info.file.isEmpty());
stream << info.collection;
stream << info.path;
stream << info.file;
stream << info.title;
stream << info.author;
stream << info.date;
stream << info.text;
stream << info.page;
stream << info.from;
stream << info.to;
if (version > 9) {
stream << info.url;
stream << info.favicon;
}
}
} else if (version > 2) {
QList<QString> references;
QList<QString> referencesContext;
int validReferenceNumber = 1;
for (const SourceExcerpt &info : c.sources) {
if (info.file.isEmpty())
continue;
QString reference;
{
QTextStream stream(&reference);
stream << (validReferenceNumber++) << ". ";
if (!info.title.isEmpty())
stream << "\"" << info.title << "\". ";
if (!info.author.isEmpty())
stream << "By " << info.author << ". ";
if (!info.date.isEmpty())
stream << "Date: " << info.date << ". ";
stream << "In " << info.file << ". ";
if (info.page != -1)
stream << "Page " << info.page << ". ";
if (info.from != -1) {
stream << "Lines " << info.from;
if (info.to != -1)
stream << "-" << info.to;
stream << ". ";
}
stream << "[Context](context://" << validReferenceNumber - 1 << ")";
}
references.append(reference);
referencesContext.append(info.text);
}
stream << references.join("\n");
stream << referencesContext;
}
stream << SourceExcerpt::toJson(c.sources);
}
return stream.status() == QDataStream::Ok;
}
@ -352,31 +275,36 @@ public:
stream >> c.stopped;
stream >> c.thumbsUpState;
stream >> c.thumbsDownState;
if (version > 7) {
if (version > 9) {
QList<SourceExcerpt> sources;
QString json;
stream >> json;
QString errorString;
sources = SourceExcerpt::fromJson(json, errorString);
Q_ASSERT(errorString.isEmpty());
c.sources = sources;
} else if (version > 7) {
qsizetype count;
stream >> count;
QList<SourceExcerpt> sources;
for (int i = 0; i < count; ++i) {
SourceExcerpt info;
stream >> info.collection;
stream >> info.path;
stream >> info.file;
stream >> info.title;
stream >> info.author;
stream >> info.date;
stream >> info.text;
stream >> info.page;
stream >> info.from;
stream >> info.to;
if (version > 9) {
stream >> info.url;
stream >> info.favicon;
}
sources.append(info);
SourceExcerpt source;
stream >> source.collection;
stream >> source.path;
stream >> source.file;
stream >> source.title;
stream >> source.author;
stream >> source.date;
Excerpt excerpt;
stream >> excerpt.text;
stream >> excerpt.page;
stream >> excerpt.from;
stream >> excerpt.to;
source.excerpts = QList{ excerpt };
sources.append(source);
}
c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}else if (version > 2) {
} else if (version > 2) {
QString references;
QList<QString> referencesContext;
stream >> references;
@ -398,7 +326,8 @@ public:
for (int j = 0; j < referenceList.size(); ++j) {
QString reference = referenceList[j];
QString context = referencesContext[j];
SourceExcerpt info;
SourceExcerpt source;
Excerpt excerpt;
QTextStream refStream(&reference);
QString dummy;
int validReferenceNumber;
@ -407,28 +336,28 @@ public:
if (reference.contains("\"")) {
int startIndex = reference.indexOf('"') + 1;
int endIndex = reference.indexOf('"', startIndex);
info.title = reference.mid(startIndex, endIndex - startIndex);
source.title = reference.mid(startIndex, endIndex - startIndex);
}
// Extract author (after "By " and before the next period)
if (reference.contains("By ")) {
int startIndex = reference.indexOf("By ") + 3;
int endIndex = reference.indexOf('.', startIndex);
info.author = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.author = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract date (after "Date: " and before the next period)
if (reference.contains("Date: ")) {
int startIndex = reference.indexOf("Date: ") + 6;
int endIndex = reference.indexOf('.', startIndex);
info.date = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.date = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract file name (after "In " and before the "[Context]")
if (reference.contains("In ") && reference.contains(". [Context]")) {
int startIndex = reference.indexOf("In ") + 3;
int endIndex = reference.indexOf(". [Context]", startIndex);
info.file = reference.mid(startIndex, endIndex - startIndex).trimmed();
source.file = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract page number (after "Page " and before the next space)
@ -436,7 +365,7 @@ public:
int startIndex = reference.indexOf("Page ") + 5;
int endIndex = reference.indexOf(' ', startIndex);
if (endIndex == -1) endIndex = reference.length();
info.page = reference.mid(startIndex, endIndex - startIndex).toInt();
excerpt.page = reference.mid(startIndex, endIndex - startIndex).toInt();
}
// Extract lines (after "Lines " and before the next space or hyphen)
@ -446,18 +375,18 @@ public:
if (endIndex == -1) endIndex = reference.length();
int hyphenIndex = reference.indexOf('-', startIndex);
if (hyphenIndex != -1 && hyphenIndex < endIndex) {
info.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt();
info.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt();
excerpt.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt();
excerpt.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt();
} else {
info.from = reference.mid(startIndex, endIndex - startIndex).toInt();
excerpt.from = reference.mid(startIndex, endIndex - startIndex).toInt();
}
}
info.text = context;
sources.append(info);
excerpt.text = context;
source.excerpts = QList{ excerpt };
sources.append(source);
}
c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}
}
beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size());

View File

@ -1106,7 +1106,7 @@ Rectangle {
Layout.preferredWidth: childrenRect.width
Layout.preferredHeight: childrenRect.height
visible: {
if (consolidatedSources.length === 0)
if (sources.length === 0)
return false
if (!MySettings.localDocsShowReferences)
return false
@ -1134,9 +1134,9 @@ Rectangle {
sourceSize.height: 24
mipmap: true
source: {
if (typeof consolidatedSources === 'undefined'
|| typeof consolidatedSources[0] === 'undefined'
|| consolidatedSources[0].url === "")
if (typeof sources === 'undefined'
|| typeof sources[0] === 'undefined'
|| sources[0].url === "")
return "qrc:/gpt4all/icons/db.svg";
else
return "qrc:/gpt4all/icons/globe.svg";
@ -1151,7 +1151,7 @@ Rectangle {
}
Text {
text: qsTr("%1 Sources").arg(consolidatedSources.length)
text: qsTr("%1 Sources").arg(sources.length)
padding: 0
font.pixelSize: theme.fontSizeLarge
font.bold: true
@ -1199,7 +1199,7 @@ Rectangle {
Layout.column: 1
Layout.topMargin: 5
visible: {
if (consolidatedSources.length === 0)
if (sources.length === 0)
return false
if (!MySettings.localDocsShowReferences)
return false
@ -1240,9 +1240,9 @@ Rectangle {
id: flow
Layout.fillWidth: true
spacing: 10
visible: consolidatedSources.length !== 0
visible: sources.length !== 0
Repeater {
model: consolidatedSources
model: sources
delegate: Rectangle {
radius: 10
@ -1361,7 +1361,7 @@ Rectangle {
return false;
if (MySettings.suggestionMode === 2) // Off
return false;
if (MySettings.suggestionMode === 0 && consolidatedSources.length === 0) // LocalDocs only
if (MySettings.suggestionMode === 0 && sources.length === 0) // LocalDocs only
return false;
return currentChat.responseState === Chat.GeneratingQuestions || currentChat.generatedQuestions.length !== 0;
}

View File

@ -408,12 +408,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
message.insert("role", "assistant");
message.insert("content", result);
choice.insert("message", message);
if (MySettings::globalInstance()->localDocsShowReferences()) {
QJsonArray references;
for (const auto &ref : infos)
references.append(ref.toJson());
choice.insert("references", references);
}
if (MySettings::globalInstance()->localDocsShowReferences())
choice.insert("references", SourceExcerpt::toJson(infos));
choices.append(choice);
}
} else {
@ -426,12 +422,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
choice.insert("index", index++);
choice.insert("logprobs", QJsonValue::Null); // We don't support
choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop");
if (MySettings::globalInstance()->localDocsShowReferences()) {
QJsonArray references;
for (const auto &ref : infos)
references.append(ref.toJson());
choice.insert("references", references);
}
if (MySettings::globalInstance()->localDocsShowReferences())
choice.insert("references", SourceExcerpt::toJson(infos));
choices.append(choice);
}
}

View File

@ -5,8 +5,53 @@
#include <QJsonObject>
#include <QJsonValue>
QString SourceExcerpt::toJson(const QList<SourceExcerpt> &sources)
{
if (sources.isEmpty())
return QString();
QJsonArray resultsArray;
for (const auto &source : sources) {
QJsonObject sourceObj;
sourceObj["date"] = source.date;
sourceObj["collection"] = source.collection;
sourceObj["path"] = source.path;
sourceObj["file"] = source.file;
sourceObj["url"] = source.url;
sourceObj["favicon"] = source.favicon;
sourceObj["title"] = source.title;
sourceObj["author"] = source.author;
sourceObj["description"] = source.description;
QJsonArray excerptsArray;
for (const auto &excerpt : source.excerpts) {
QJsonObject excerptObj;
excerptObj["text"] = excerpt.text;
if (excerpt.page != -1)
excerptObj["page"] = excerpt.page;
if (excerpt.from != -1)
excerptObj["from"] = excerpt.from;
if (excerpt.to != -1)
excerptObj["to"] = excerpt.to;
excerptsArray.append(excerptObj);
}
sourceObj["excerpts"] = excerptsArray;
resultsArray.append(sourceObj);
}
QJsonObject jsonObj;
jsonObj["results"] = resultsArray;
QJsonDocument doc(jsonObj);
return doc.toJson(QJsonDocument::Compact);
}
QList<SourceExcerpt> SourceExcerpt::fromJson(const QString &json, QString &errorString)
{
if (json.isEmpty())
return QList<SourceExcerpt>();
QJsonParseError err;
QJsonDocument document = QJsonDocument::fromJson(json.toUtf8(), &err);
if (err.error != QJsonParseError::NoError) {
@ -44,7 +89,7 @@ QList<SourceExcerpt> SourceExcerpt::fromJson(const QString &json, QString &error
SourceExcerpt source;
source.date = result["date"].toString();
if (result.contains("collection"))
source.collection = result["text"].toString();
source.collection = result["collection"].toString();
if (result.contains("path"))
source.path = result["path"].toString();
if (result.contains("file"))
@ -61,15 +106,6 @@ QList<SourceExcerpt> SourceExcerpt::fromJson(const QString &json, QString &error
source.author = result["description"].toString();
for (int i = 0; i < textExcerpts.size(); ++i) {
SourceExcerpt excerpt;
excerpt.date = source.date;
excerpt.collection = source.collection;
excerpt.path = source.path;
excerpt.file = source.file;
excerpt.url = source.url;
excerpt.favicon = source.favicon;
excerpt.title = source.title;
excerpt.author = source.author;
if (!textExcerpts[i].isObject()) {
errorString = "result excerpt is not an object";
return QList<SourceExcerpt>();
@ -79,6 +115,7 @@ QList<SourceExcerpt> SourceExcerpt::fromJson(const QString &json, QString &error
errorString = "result excerpt is does not have text field";
return QList<SourceExcerpt>();
}
Excerpt excerpt;
excerpt.text = excerptObj["text"].toString();
if (excerptObj.contains("page"))
excerpt.page = excerptObj["page"].toInt();
@ -86,8 +123,9 @@ QList<SourceExcerpt> SourceExcerpt::fromJson(const QString &json, QString &error
excerpt.from = excerptObj["from"].toInt();
if (excerptObj.contains("to"))
excerpt.to = excerptObj["to"].toInt();
excerpts.append(excerpt);
source.excerpts.append(excerpt);
}
excerpts.append(source);
}
return excerpts;
}

View File

@ -8,10 +8,23 @@
using namespace Qt::Literals::StringLiterals;
struct Excerpt {
QString text; // [Required] The text actually used in the augmented context
int page = -1; // [Optional] The page where the text was found
int from = -1; // [Optional] The line number where the text begins
int to = -1; // [Optional] The line number where the text ends
bool operator==(const Excerpt &other) const {
return text == other.text && page == other.page && from == other.from && to == other.to;
}
bool operator!=(const Excerpt &other) const {
return !(*this == other);
}
};
Q_DECLARE_METATYPE(Excerpt)
struct SourceExcerpt {
Q_GADGET
Q_PROPERTY(QString date MEMBER date)
Q_PROPERTY(QString text MEMBER text)
Q_PROPERTY(QString collection MEMBER collection)
Q_PROPERTY(QString path MEMBER path)
Q_PROPERTY(QString file MEMBER file)
@ -20,25 +33,40 @@ struct SourceExcerpt {
Q_PROPERTY(QString title MEMBER title)
Q_PROPERTY(QString author MEMBER author)
Q_PROPERTY(QString description MEMBER description)
Q_PROPERTY(int page MEMBER page)
Q_PROPERTY(int from MEMBER from)
Q_PROPERTY(int to MEMBER to)
Q_PROPERTY(QString fileUri READ fileUri STORED false)
Q_PROPERTY(QString text READ text STORED false)
Q_PROPERTY(QList<Excerpt> excerpts MEMBER excerpts)
public:
QString date; // [Required] The creation or the last modification date whichever is latest
QString text; // [Required] The text actually used in the augmented context
QString collection; // [Optional] The name of the collection
QString path; // [Optional] The full path
QString file; // [Optional] The name of the file, but not the full path
QString url; // [Optional] The name of the remote url
QString favicon; // [Optional] The favicon
QString title; // [Optional] The title of the document
QString author; // [Optional] The author of the document
QString description;// [Optional] The description of the source
int page = -1; // [Optional] The page where the text was found
int from = -1; // [Optional] The line number where the text begins
int to = -1; // [Optional] The line number where the text ends
QString date; // [Required] The creation or the last modification date whichever is latest
QString collection; // [Optional] The name of the collection
QString path; // [Optional] The full path
QString file; // [Optional] The name of the file, but not the full path
QString url; // [Optional] The name of the remote url
QString favicon; // [Optional] The favicon
QString title; // [Optional] The title of the document
QString author; // [Optional] The author of the document
QString description; // [Optional] The description of the source
QList<Excerpt> excerpts;// [Required] The list of excerpts
// Returns a human readable string containing all the excerpts
QString text() const {
QStringList formattedExcerpts;
for (const auto& excerpt : excerpts) {
QString formattedExcerpt = excerpt.text;
if (excerpt.page != -1) {
formattedExcerpt += QStringLiteral(" (Page: %1").arg(excerpt.page);
if (excerpt.from != -1 && excerpt.to != -1) {
formattedExcerpt += QStringLiteral(", Lines: %1-%2").arg(excerpt.from).arg(excerpt.to);
}
formattedExcerpt += QStringLiteral(")");
} else if (excerpt.from != -1 && excerpt.to != -1) {
formattedExcerpt += QStringLiteral(" (Lines: %1-%2)").arg(excerpt.from).arg(excerpt.to);
}
formattedExcerpts.append(formattedExcerpt);
}
return formattedExcerpts.join(QStringLiteral("\n---\n"));
}
QString fileUri() const {
// QUrl reserved chars that are not UNSAFE_PATH according to glib/gconvert.c
@ -55,25 +83,7 @@ public:
return u"file://"_s + escaped;
}
QJsonObject toJson() const
{
QJsonObject result;
result.insert("date", date);
result.insert("text", text);
result.insert("collection", collection);
result.insert("path", path);
result.insert("file", file);
result.insert("url", url);
result.insert("favicon", favicon);
result.insert("title", title);
result.insert("author", author);
result.insert("description", description);
result.insert("page", page);
result.insert("from", from);
result.insert("to", to);
return result;
}
static QString toJson(const QList<SourceExcerpt> &sources);
static QList<SourceExcerpt> fromJson(const QString &json, QString &errorString);
bool operator==(const SourceExcerpt &other) const {