Begin implementing the localdocs ui in earnest.

This commit is contained in:
Adam Treat 2023-05-22 22:13:42 -04:00 committed by AT
parent 120fbbf67d
commit d555ed3b07
11 changed files with 1521 additions and 1114 deletions

View File

@ -62,9 +62,10 @@ qt_add_executable(chat
chatllm.h chatllm.cpp chatllm.h chatllm.cpp
chatmodel.h chatlistmodel.h chatlistmodel.cpp chatmodel.h chatlistmodel.h chatlistmodel.cpp
chatgpt.h chatgpt.cpp chatgpt.h chatgpt.cpp
database.h database.cpp
download.h download.cpp download.h download.cpp
network.h network.cpp network.h network.cpp
localdocs.h localdocs.cpp localdocs.h localdocs.cpp localdocsmodel.h localdocsmodel.cpp
llm.h llm.cpp llm.h llm.cpp
server.h server.cpp server.h server.cpp
sysinfo.h sysinfo.h
@ -77,6 +78,7 @@ qt_add_qml_module(chat
main.qml main.qml
qml/ChatDrawer.qml qml/ChatDrawer.qml
qml/LocalDocs.qml qml/LocalDocs.qml
qml/AddCollectionDialog.qml
qml/ModelDownloaderDialog.qml qml/ModelDownloaderDialog.qml
qml/NetworkDialog.qml qml/NetworkDialog.qml
qml/NewVersionDialog.qml qml/NewVersionDialog.qml

988
gpt4all-chat/database.cpp Normal file
View File

@ -0,0 +1,988 @@
#include "database.h"
#include "download.h"
#include <QTimer>
#include <QPdfDocument>
//#define DEBUG
//#define DEBUG_EXAMPLE
#define LOCALDOCS_VERSION 0
const auto INSERT_CHUNK_SQL = QLatin1String(R"(
insert into chunks(document_id, chunk_id, chunk_text,
embedding_id, embedding_path) values(?, ?, ?, ?, ?);
)");
const auto INSERT_CHUNK_FTS_SQL = QLatin1String(R"(
insert into chunks_fts(document_id, chunk_id, chunk_text,
embedding_id, embedding_path) values(?, ?, ?, ?, ?);
)");
const auto DELETE_CHUNKS_SQL = QLatin1String(R"(
delete from chunks WHERE document_id = ?;
)");
const auto DELETE_CHUNKS_FTS_SQL = QLatin1String(R"(
delete from chunks_fts WHERE document_id = ?;
)");
const auto CHUNKS_SQL = QLatin1String(R"(
create table chunks(document_id integer, chunk_id integer, chunk_text varchar,
embedding_id integer, embedding_path varchar);
)");
const auto FTS_CHUNKS_SQL = QLatin1String(R"(
create virtual table chunks_fts using fts5(document_id unindexed, chunk_id unindexed, chunk_text,
embedding_id unindexed, embedding_path unindexed, tokenize="trigram");
)");
const auto SELECT_SQL = QLatin1String(R"(
select chunks_fts.rowid, chunks_fts.document_id, chunks_fts.chunk_text
from chunks_fts
join documents ON chunks_fts.document_id = documents.id
join folders ON documents.folder_id = folders.id
join collections ON folders.id = collections.folder_id
where chunks_fts match ? and collections.collection_name in (%1)
order by bm25(chunks_fts) desc
limit 3;
)");
bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id,
const QString &embedding_path)
{
{
if (!q.prepare(INSERT_CHUNK_SQL))
return false;
q.addBindValue(document_id);
q.addBindValue(chunk_id);
q.addBindValue(chunk_text);
q.addBindValue(embedding_id);
q.addBindValue(embedding_path);
if (!q.exec())
return false;
}
{
if (!q.prepare(INSERT_CHUNK_FTS_SQL))
return false;
q.addBindValue(document_id);
q.addBindValue(chunk_id);
q.addBindValue(chunk_text);
q.addBindValue(embedding_id);
q.addBindValue(embedding_path);
if (!q.exec())
return false;
}
return true;
}
bool removeChunksByDocumentId(QSqlQuery &q, int document_id)
{
{
if (!q.prepare(DELETE_CHUNKS_SQL))
return false;
q.addBindValue(document_id);
if (!q.exec())
return false;
}
{
if (!q.prepare(DELETE_CHUNKS_FTS_SQL))
return false;
q.addBindValue(document_id);
if (!q.exec())
return false;
}
return true;
}
QStringList generateGrams(const QString &input, int N)
{
// Remove common English punctuation using QRegularExpression
QRegularExpression punctuation(R"([.,;:!?'"()\-])");
QString cleanedInput = input;
cleanedInput = cleanedInput.remove(punctuation);
// Split the cleaned input into words using whitespace
QStringList words = cleanedInput.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts);
N = qMin(words.size(), N);
// Generate all possible N-grams
QStringList ngrams;
for (int i = 0; i < words.size() - (N - 1); ++i) {
QStringList currentNgram;
for (int j = 0; j < N; ++j) {
currentNgram.append("\"" + words[i + j] + "\"");
}
ngrams.append("NEAR(" + currentNgram.join(" ") + ", " + QString::number(N) + ")");
}
return ngrams;
}
bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QString &chunk_text)
{
const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size();
for (int N = N_WORDS; N > 2; N--) {
// first try trigrams
QList<QString> text = generateGrams(chunk_text, N);
QString orText = text.join(" OR ");
const QString collection_names_str = collection_names.join("', '");
const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'");
if (!q.prepare(formatted_query))
return false;
q.addBindValue(orText);
bool success = q.exec();
if (!success) return false;
if (q.next()) {
#if defined(DEBUG)
qDebug() << "hit on" << N << "before" << chunk_text << "after" << orText;
#endif
q.previous();
return true;
}
}
return true;
}
void printResults(QSqlQuery &q)
{
while (q.next()) {
int rowid = q.value(0).toInt();
QString collection_name = q.value(1).toString();
QString chunk_text = q.value(2).toString();
qDebug() << "rowid:" << rowid
<< "collection_name:" << collection_name
<< "chunk_text:" << chunk_text;
}
}
const auto INSERT_COLLECTION_SQL = QLatin1String(R"(
insert into collections(collection_name, folder_id) values(?, ?);
)");
const auto DELETE_COLLECTION_SQL = QLatin1String(R"(
delete from collections where collection_name = ? and folder_id = ?;
)");
const auto COLLECTIONS_SQL = QLatin1String(R"(
create table collections(collection_name varchar, folder_id integer, unique(collection_name, folder_id));
)");
const auto SELECT_FOLDERS_FROM_COLLECTIONS_SQL = QLatin1String(R"(
select folder_id from collections where collection_name = ?;
)");
const auto SELECT_COLLECTIONS_FROM_FOLDER_SQL = QLatin1String(R"(
select collection_name from collections where folder_id = ?;
)");
const auto SELECT_COLLECTIONS_SQL = QLatin1String(R"(
select c.collection_name, f.folder_path, f.id
from collections c
join folders f on c.folder_id = f.id
order by c.collection_name asc, f.folder_path asc;
)");
bool addCollection(QSqlQuery &q, const QString &collection_name, int folder_id)
{
if (!q.prepare(INSERT_COLLECTION_SQL))
return false;
q.addBindValue(collection_name);
q.addBindValue(folder_id);
return q.exec();
}
bool removeCollection(QSqlQuery &q, const QString &collection_name, int folder_id)
{
if (!q.prepare(DELETE_COLLECTION_SQL))
return false;
q.addBindValue(collection_name);
q.addBindValue(folder_id);
return q.exec();
}
bool selectFoldersFromCollection(QSqlQuery &q, const QString &collection_name, QList<int> *folderIds) {
if (!q.prepare(SELECT_FOLDERS_FROM_COLLECTIONS_SQL))
return false;
q.addBindValue(collection_name);
if (!q.exec())
return false;
while (q.next())
folderIds->append(q.value(0).toInt());
return true;
}
bool selectCollectionsFromFolder(QSqlQuery &q, int folder_id, QList<QString> *collections) {
if (!q.prepare(SELECT_COLLECTIONS_FROM_FOLDER_SQL))
return false;
q.addBindValue(folder_id);
if (!q.exec())
return false;
while (q.next())
collections->append(q.value(0).toString());
return true;
}
bool selectAllFromCollections(QSqlQuery &q, QList<CollectionItem> *collections) {
if (!q.prepare(SELECT_COLLECTIONS_SQL))
return false;
if (!q.exec())
return false;
while (q.next()) {
CollectionItem i;
i.collection = q.value(0).toString();
i.folder_path = q.value(1).toString();
i.folder_id = q.value(0).toInt();
collections->append(i);
}
return true;
}
const auto INSERT_FOLDERS_SQL = QLatin1String(R"(
insert into folders(folder_path) values(?);
)");
const auto DELETE_FOLDERS_SQL = QLatin1String(R"(
delete from folders where id = ?;
)");
const auto SELECT_FOLDERS_FROM_PATH_SQL = QLatin1String(R"(
select id from folders where folder_path = ?;
)");
const auto SELECT_FOLDERS_FROM_ID_SQL = QLatin1String(R"(
select folder_path from folders where id = ?;
)");
const auto SELECT_ALL_FOLDERPATHS_SQL = QLatin1String(R"(
select folder_path from folders;
)");
const auto FOLDERS_SQL = QLatin1String(R"(
create table folders(id integer primary key, folder_path varchar unique);
)");
bool addFolderToDB(QSqlQuery &q, const QString &folder_path, int *folder_id)
{
if (!q.prepare(INSERT_FOLDERS_SQL))
return false;
q.addBindValue(folder_path);
if (!q.exec())
return false;
*folder_id = q.lastInsertId().toInt();
return true;
}
bool removeFolderFromDB(QSqlQuery &q, int folder_id) {
if (!q.prepare(DELETE_FOLDERS_SQL))
return false;
q.addBindValue(folder_id);
return q.exec();
}
bool selectFolder(QSqlQuery &q, const QString &folder_path, int *id) {
if (!q.prepare(SELECT_FOLDERS_FROM_PATH_SQL))
return false;
q.addBindValue(folder_path);
if (!q.exec())
return false;
Q_ASSERT(q.size() < 2);
if (q.next())
*id = q.value(0).toInt();
return true;
}
bool selectFolder(QSqlQuery &q, int id, QString *folder_path) {
if (!q.prepare(SELECT_FOLDERS_FROM_ID_SQL))
return false;
q.addBindValue(id);
if (!q.exec())
return false;
Q_ASSERT(q.size() < 2);
if (q.next())
*folder_path = q.value(0).toString();
return true;
}
bool selectAllFolderPaths(QSqlQuery &q, QList<QString> *folder_paths) {
if (!q.prepare(SELECT_ALL_FOLDERPATHS_SQL))
return false;
if (!q.exec())
return false;
while (q.next())
folder_paths->append(q.value(0).toString());
return true;
}
const auto INSERT_DOCUMENTS_SQL = QLatin1String(R"(
insert into documents(folder_id, document_time, document_path) values(?, ?, ?);
)");
const auto UPDATE_DOCUMENT_TIME_SQL = QLatin1String(R"(
update documents set document_time = ? where id = ?;
)");
const auto DELETE_DOCUMENTS_SQL = QLatin1String(R"(
delete from documents where id = ?;
)");
const auto DOCUMENTS_SQL = QLatin1String(R"(
create table documents(id integer primary key, folder_id integer, document_time integer, document_path varchar unique);
)");
const auto SELECT_DOCUMENT_SQL = QLatin1String(R"(
select id, document_time from documents where document_path = ?;
)");
const auto SELECT_DOCUMENTS_SQL = QLatin1String(R"(
select id from documents where folder_id = ?;
)");
const auto SELECT_ALL_DOCUMENTS_SQL = QLatin1String(R"(
select id, document_path from documents;
)");
bool addDocument(QSqlQuery &q, int folder_id, qint64 document_time, const QString &document_path, int *document_id)
{
if (!q.prepare(INSERT_DOCUMENTS_SQL))
return false;
q.addBindValue(folder_id);
q.addBindValue(document_time);
q.addBindValue(document_path);
if (!q.exec())
return false;
*document_id = q.lastInsertId().toInt();
return true;
}
bool removeDocument(QSqlQuery &q, int document_id) {
if (!q.prepare(DELETE_DOCUMENTS_SQL))
return false;
q.addBindValue(document_id);
return q.exec();
}
bool updateDocument(QSqlQuery &q, int id, qint64 document_time)
{
if (!q.prepare(UPDATE_DOCUMENT_TIME_SQL))
return false;
q.addBindValue(id);
q.addBindValue(document_time);
return q.exec();
}
bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, qint64 *document_time) {
if (!q.prepare(SELECT_DOCUMENT_SQL))
return false;
q.addBindValue(document_path);
if (!q.exec())
return false;
Q_ASSERT(q.size() < 2);
if (q.next()) {
*id = q.value(0).toInt();
*document_time = q.value(1).toLongLong();
}
return true;
}
bool selectDocuments(QSqlQuery &q, int folder_id, QList<int> *documentIds) {
if (!q.prepare(SELECT_DOCUMENTS_SQL))
return false;
q.addBindValue(folder_id);
if (!q.exec())
return false;
while (q.next())
documentIds->append(q.value(0).toInt());
return true;
}
QSqlError initDb()
{
QString dbPath = Download::globalInstance()->downloadLocalModelsPath()
+ QString("localdocs_v%1.db").arg(LOCALDOCS_VERSION);
QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE");
db.setDatabaseName(dbPath);
if (!db.open())
return db.lastError();
QStringList tables = db.tables();
if (tables.contains("chunks", Qt::CaseInsensitive))
return QSqlError();
QSqlQuery q;
if (!q.exec(CHUNKS_SQL))
return q.lastError();
if (!q.exec(FTS_CHUNKS_SQL))
return q.lastError();
if (!q.exec(COLLECTIONS_SQL))
return q.lastError();
if (!q.exec(FOLDERS_SQL))
return q.lastError();
if (!q.exec(DOCUMENTS_SQL))
return q.lastError();
#if defined(DEBUG_EXAMPLE)
// Add a folder
QString folder_path = "/example/folder";
int folder_id;
if (!addFolderToDB(q, folder_path, &folder_id)) {
qDebug() << "Error adding folder:" << q.lastError().text();
return q.lastError();
}
// Add a collection
QString collection_name = "Example Collection";
if (!addCollection(q, collection_name, folder_id)) {
qDebug() << "Error adding collection:" << q.lastError().text();
return q.lastError();
}
// Add a document
int document_time = 123456789;
int document_id;
QString document_path = "/example/folder/document1.txt";
if (!addDocument(q, folder_id, document_time, document_path, &document_id)) {
qDebug() << "Error adding document:" << q.lastError().text();
return q.lastError();
}
// Add chunks to the document
QString chunk_text1 = "This is an example chunk.";
QString chunk_text2 = "Another example chunk.";
QString embedding_path = "/example/embeddings/embedding1.bin";
int embedding_id = 1;
if (!addChunk(q, document_id, 1, chunk_text1, embedding_id, embedding_path) ||
!addChunk(q, document_id, 2, chunk_text2, embedding_id, embedding_path)) {
qDebug() << "Error adding chunks:" << q.lastError().text();
return q.lastError();
}
// Perform a search
QList<QString> collection_names = {collection_name};
QString search_text = "example";
if (!selectChunk(q, collection_names, search_text)) {
qDebug() << "Error selecting chunks:" << q.lastError().text();
return q.lastError();
}
// Print the results
printResults(q);
#endif
return QSqlError();
}
Database::Database()
: QObject(nullptr)
, m_watcher(new QFileSystemWatcher(this))
{
moveToThread(&m_dbThread);
connect(&m_dbThread, &QThread::started, this, &Database::start);
m_dbThread.setObjectName("database");
m_dbThread.start();
}
void Database::handleDocumentErrorAndScheduleNext(const QString &errorMessage,
int document_id, const QString &document_path, const QSqlError &error)
{
qWarning() << errorMessage << document_id << document_path << error.text();
if (!m_docsToScan.isEmpty())
QTimer::singleShot(0, this, &Database::scanQueue);
}
void Database::chunkStream(QTextStream &stream, int document_id)
{
const int chunkSize = 256;
int chunk_id = 0;
int charCount = 0;
QList<QString> words;
while (!stream.atEnd()) {
QString word;
stream >> word;
charCount += word.length();
words.append(word);
if (charCount + words.size() - 1 >= chunkSize || stream.atEnd()) {
const QString chunk = words.join(" ");
QSqlQuery q;
if (!addChunk(q,
document_id,
++chunk_id,
chunk,
0 /*embedding_id*/,
QString() /*embedding_path*/
)) {
qWarning() << "ERROR: Could not insert chunk into db" << q.lastError();
}
words.clear();
charCount = 0;
}
}
}
void Database::scanQueue()
{
if (m_docsToScan.isEmpty())
return;
DocumentInfo info = m_docsToScan.dequeue();
// Update info
info.doc.stat();
// If the doc has since been deleted or no longer readable, then we schedule more work and return
// leaving the cleanup for the cleanup handler
if (!info.doc.exists() || !info.doc.isReadable()) {
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
return;
}
const int folder_id = info.folder;
const qint64 document_time = info.doc.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch();
const QString document_path = info.doc.canonicalFilePath();
#if defined(DEBUG)
qDebug() << "scanning document" << document_path;
#endif
// Check and see if we already have this document
QSqlQuery q;
int existing_id = -1;
qint64 existing_time = -1;
if (!selectDocument(q, document_path, &existing_id, &existing_time)) {
return handleDocumentErrorAndScheduleNext("ERROR: Cannot select document",
existing_id, document_path, q.lastError());
}
// If we have the document, we need to compare the last modification time and if it is newer
// we must rescan the document, otherwise return
if (existing_id != -1) {
Q_ASSERT(existing_time != -1);
if (document_time == existing_time) {
// No need to rescan, but we do have to schedule next
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
return;
} else {
if (!removeChunksByDocumentId(q, existing_id)) {
return handleDocumentErrorAndScheduleNext("ERROR: Cannot remove chunks of document",
existing_id, document_path, q.lastError());
}
}
}
// Update the document_time for an existing document, or add it for the first time now
int document_id = existing_id;
if (document_id != -1) {
if (!updateDocument(q, document_id, document_time)) {
return handleDocumentErrorAndScheduleNext("ERROR: Could not update document_time",
document_id, document_path, q.lastError());
}
} else {
if (!addDocument(q, folder_id, document_time, document_path, &document_id)) {
return handleDocumentErrorAndScheduleNext("ERROR: Could not add document",
document_id, document_path, q.lastError());
}
}
QElapsedTimer timer;
timer.start();
QSqlDatabase::database().transaction();
Q_ASSERT(document_id != -1);
if (info.doc.suffix() == QLatin1String("pdf")) {
QPdfDocument doc;
if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) {
return handleDocumentErrorAndScheduleNext("ERROR: Could not load pdf",
document_id, document_path, q.lastError());
return;
}
QString text;
for (int i = 0; i < doc.pageCount(); ++i) {
const QPdfSelection selection = doc.getAllText(i);
text.append(selection.text());
}
QTextStream stream(&text);
chunkStream(stream, document_id);
} else {
QFile file(document_path);
if (!file.open( QIODevice::ReadOnly)) {
return handleDocumentErrorAndScheduleNext("ERROR: Cannot open file for scanning",
existing_id, document_path, q.lastError());
}
QTextStream stream(&file);
chunkStream(stream, document_id);
file.close();
}
QSqlDatabase::database().commit();
#if defined(DEBUG)
qDebug() << "chunking" << document_path << "took" << timer.elapsed() << "ms";
#endif
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
}
void Database::scanDocuments(int folder_id, const QString &folder_path)
{
#if defined(DEBUG)
qDebug() << "scanning folder for documents" << folder_path;
#endif
static const QList<QString> extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm",
"xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "tex", "asc", "wks",
"wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx",
"plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht",
"mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps",
"xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1",
"wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1",
"wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm",
"xlt", "xln" };
QDir dir(folder_path);
Q_ASSERT(dir.exists());
Q_ASSERT(dir.isReadable());
QDirIterator it(folder_path, QDir::Readable | QDir::Files, QDirIterator::Subdirectories);
while (it.hasNext()) {
it.next();
QFileInfo fileInfo = it.fileInfo();
if (fileInfo.isDir()) {
addFolderToWatch(fileInfo.canonicalFilePath());
continue;
}
if (!extensions.contains(fileInfo.suffix()))
continue;
DocumentInfo info;
info.folder = folder_id;
info.doc = fileInfo;
m_docsToScan.enqueue(info);
}
emit docsToScanChanged();
}
void Database::start()
{
connect(m_watcher, &QFileSystemWatcher::directoryChanged, this, &Database::directoryChanged);
connect(this, &Database::docsToScanChanged, this, &Database::scanQueue);
if (!QSqlDatabase::drivers().contains("QSQLITE")) {
qWarning() << "ERROR: missing sqllite driver";
} else {
QSqlError err = initDb();
if (err.type() != QSqlError::NoError)
qWarning() << "ERROR: initializing db" << err.text();
}
addCurrentFolders();
}
void Database::addCurrentFolders()
{
#if defined(DEBUG)
qDebug() << "addCurrentFolders";
#endif
QSqlQuery q;
QList<CollectionItem> collections;
if (!selectAllFromCollections(q, &collections)) {
qWarning() << "ERROR: Cannot select collections" << q.lastError();
return;
}
for (auto i : collections)
addFolder(i.collection, i.folder_path);
}
void Database::updateCollectionList()
{
#if defined(DEBUG)
qDebug() << "updateCollectionList";
#endif
QSqlQuery q;
QList<CollectionItem> collections;
if (!selectAllFromCollections(q, &collections)) {
qWarning() << "ERROR: Cannot select collections" << q.lastError();
return;
}
emit collectionListUpdated(collections);
}
void Database::addFolder(const QString &collection, const QString &path)
{
QFileInfo info(path);
if (!info.exists() || !info.isReadable()) {
qWarning() << "ERROR: Cannot add folder that doesn't exist or not readable" << path;
return;
}
QSqlQuery q;
int folder_id = -1;
// See if the folder exists in the db
if (!selectFolder(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError();
return;
}
// Add the folder
if (folder_id == -1 && !addFolderToDB(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot add folder to db with path" << path << q.lastError();
return;
}
Q_ASSERT(folder_id != -1);
// See if the folder has already been added to the collection
QList<int> folders;
if (!selectFoldersFromCollection(q, collection, &folders)) {
qWarning() << "ERROR: Cannot select folders from collections" << collection << q.lastError();
return;
}
if (!folders.contains(folder_id) && !addCollection(q, collection, folder_id)) {
qWarning() << "ERROR: Cannot add folder to collection" << collection << path << q.lastError();
return;
}
if (!addFolderToWatch(path))
return;
scanDocuments(folder_id, path);
updateCollectionList();
}
void Database::removeFolder(const QString &collection, const QString &path)
{
#if defined(DEBUG)
qDebug() << "removeFolder" << path;
#endif
QSqlQuery q;
int folder_id = -1;
// See if the folder exists in the db
if (!selectFolder(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError();
return;
}
// If we don't have a folder_id in the db, then something bad has happened
Q_ASSERT(folder_id != -1);
if (folder_id == -1) {
qWarning() << "ERROR: Collected folder does not exist in db" << path;
m_watcher->removePath(path);
return;
}
removeFolderInternal(collection, folder_id, path);
}
void Database::removeFolderInternal(const QString &collection, int folder_id, const QString &path)
{
// Determine if the folder is used by more than one collection
QSqlQuery q;
QList<QString> collections;
if (!selectCollectionsFromFolder(q, folder_id, &collections)) {
qWarning() << "ERROR: Cannot select collections from folder" << folder_id << q.lastError();
return;
}
// Remove it from the collections
if (!removeCollection(q, collection, folder_id)) {
qWarning() << "ERROR: Cannot remove collection" << collection << folder_id << q.lastError();
return;
}
// If the folder is associated with more than one collection, then return
if (collections.count() > 1)
return;
// First remove all upcoming jobs associated with this folder by performing an opt-in filter
QQueue<DocumentInfo> docsToScan;
for (DocumentInfo info : m_docsToScan) {
if (info.folder == folder_id)
continue;
docsToScan.append(info);
}
m_docsToScan = docsToScan;
emit docsToScanChanged();
// Get a list of all documents associated with folder
QList<int> documentIds;
if (!selectDocuments(q, folder_id, &documentIds)) {
qWarning() << "ERROR: Cannot select documents" << folder_id << q.lastError();
return;
}
// Remove all chunks and documents associated with this folder
for (int document_id : documentIds) {
if (!removeChunksByDocumentId(q, document_id)) {
qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << q.lastError();
return;
}
if (!removeDocument(q, document_id)) {
qWarning() << "ERROR: Cannot remove document_id" << document_id << q.lastError();
return;
}
}
if (!removeFolderFromDB(q, folder_id)) {
qWarning() << "ERROR: Cannot remove folder_id" << folder_id << q.lastError();
return;
}
removeFolderFromWatch(path);
updateCollectionList();
}
bool Database::addFolderToWatch(const QString &path)
{
#if defined(DEBUG)
qDebug() << "addFolderToWatch" << path;
#endif
if (!m_watcher->addPath(path)) {
qWarning() << "ERROR: Cannot add path to file watcher:" << path;
return false;
}
return true;
}
bool Database::removeFolderFromWatch(const QString &path)
{
#if defined(DEBUG)
qDebug() << "removeFolderFromWatch" << path;
#endif
if (!m_watcher->removePath(path)) {
qWarning() << "ERROR: Cannot remove path from file watcher:" << path;
return false;
}
return true;
}
void Database::retrieveFromDB(const QList<QString> &collections, const QString &text)
{
#if defined(DEBUG)
qDebug() << "retrieveFromDB" << collections << text;
#endif
QSqlQuery q;
if (!selectChunk(q, collections, text)) {
qDebug() << "ERROR: selecting chunks:" << q.lastError().text();
return;
}
QList<QString> results;
while (q.next()) {
int rowid = q.value(0).toInt();
QString collection_name = q.value(1).toString();
QString chunk_text = q.value(2).toString();
results.append(chunk_text);
#if defined(DEBUG)
qDebug() << "retrieve rowid:" << rowid
<< "collection_name:" << collection_name
<< "chunk_text:" << chunk_text;
#endif
}
emit retrieveResult(results);
}
void Database::cleanDB()
{
#if defined(DEBUG)
qDebug() << "cleanDB";
#endif
// Scan all folders in db to make sure they still exist
QSqlQuery q;
QList<CollectionItem> collections;
if (!selectAllFromCollections(q, &collections)) {
qWarning() << "ERROR: Cannot select collections" << q.lastError();
return;
}
for (auto i : collections) {
// Find the path for the folder
QFileInfo info(i.folder_path);
if (!info.exists() || !info.isReadable()) {
#if defined(DEBUG)
qDebug() << "clean db removing folder" << i.folder_id << i.folder_path;
#endif
removeFolderInternal(i.collection, i.folder_id, i.folder_path);
}
}
// Scan all documents in db to make sure they still exist
if (!q.prepare(SELECT_ALL_DOCUMENTS_SQL)) {
qWarning() << "ERROR: Cannot prepare sql for select all documents" << q.lastError();
return;
}
if (!q.exec()) {
qWarning() << "ERROR: Cannot exec sql for select all documents" << q.lastError();
return;
}
while (q.next()) {
int document_id = q.value(0).toInt();
QString document_path = q.value(1).toString();
QFileInfo info(document_path);
if (info.exists() && info.isReadable())
continue;
#if defined(DEBUG)
qDebug() << "clean db removing document" << document_id << document_path;
#endif
// Remove all chunks and documents that either don't exist or have become unreadable
QSqlQuery query;
if (!removeChunksByDocumentId(query, document_id)) {
qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << query.lastError();
}
if (!removeDocument(query, document_id)) {
qWarning() << "ERROR: Cannot remove document_id" << document_id << query.lastError();
}
}
updateCollectionList();
}
void Database::directoryChanged(const QString &path)
{
#if defined(DEBUG)
qDebug() << "directoryChanged" << path;
#endif
QSqlQuery q;
int folder_id = -1;
// Lookup the folder_id in the db
if (!selectFolder(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError();
return;
}
// If we don't have a folder_id in the db, then something bad has happened
Q_ASSERT(folder_id != -1);
if (folder_id == -1) {
qWarning() << "ERROR: Watched folder does not exist in db" << path;
m_watcher->removePath(path);
return;
}
// Clean the database
cleanDB();
// Rescan the documents associated with the folder
scanDocuments(folder_id, path);
}

64
gpt4all-chat/database.h Normal file
View File

@ -0,0 +1,64 @@
#ifndef DATABASE_H
#define DATABASE_H
#include <QObject>
#include <QtSql>
#include <QQueue>
#include <QFileInfo>
#include <QThread>
#include <QFileSystemWatcher>
struct DocumentInfo
{
int folder;
QFileInfo doc;
};
struct CollectionItem {
QString collection;
QString folder_path;
int folder_id = -1;
};
Q_DECLARE_METATYPE(CollectionItem)
class Database : public QObject
{
Q_OBJECT
public:
Database();
public Q_SLOTS:
void scanQueue();
void scanDocuments(int folder_id, const QString &folder_path);
void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path);
void retrieveFromDB(const QList<QString> &collections, const QString &text);
void cleanDB();
Q_SIGNALS:
void docsToScanChanged();
void retrieveResult(const QList<QString> &result);
void collectionListUpdated(const QList<CollectionItem> &collectionList);
private Q_SLOTS:
void start();
void directoryChanged(const QString &path);
bool addFolderToWatch(const QString &path);
bool removeFolderFromWatch(const QString &path);
void addCurrentFolders();
void updateCollectionList();
private:
void removeFolderInternal(const QString &collection, int folder_id, const QString &path);
void chunkStream(QTextStream &stream, int document_id);
void handleDocumentErrorAndScheduleNext(const QString &errorMessage,
int document_id, const QString &document_path, const QSqlError &error);
private:
QQueue<DocumentInfo> m_docsToScan;
QList<QString> m_retrieve;
QThread m_dbThread;
QFileSystemWatcher *m_watcher;
};
#endif // DATABASE_H

File diff suppressed because it is too large Load Diff

View File

@ -1,80 +1,23 @@
#ifndef LOCALDOCS_H #ifndef LOCALDOCS_H
#define LOCALDOCS_H #define LOCALDOCS_H
#include "localdocsmodel.h"
#include "database.h"
#include <QObject> #include <QObject>
#include <QtSql>
#include <QQueue>
#include <QFileInfo>
#include <QThread>
#include <QFileSystemWatcher>
struct DocumentInfo
{
int folder;
QFileInfo doc;
};
struct CollectionInfo {
Q_GADGET
Q_PROPERTY(QString name MEMBER name)
public:
QString name;
QList<QString> folders;
};
Q_DECLARE_METATYPE(CollectionInfo)
class Database : public QObject
{
Q_OBJECT
public:
Database();
public Q_SLOTS:
void scanQueue();
void scanDocuments(int folder_id, const QString &folder_path);
void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path);
void retrieveFromDB(const QList<QString> &collections, const QString &text);
void cleanDB();
Q_SIGNALS:
void docsToScanChanged();
void retrieveResult(const QList<QString> &result);
void collectionListUpdated(const QList<CollectionInfo> &collectionList);
private Q_SLOTS:
void start();
void directoryChanged(const QString &path);
bool addFolderToWatch(const QString &path);
bool removeFolderFromWatch(const QString &path);
void addCurrentFolders();
void updateCollectionList();
private:
void removeFolderInternal(const QString &collection, int folder_id, const QString &path);
void chunkStream(QTextStream &stream, int document_id);
void handleDocumentErrorAndScheduleNext(const QString &errorMessage,
int document_id, const QString &document_path, const QSqlError &error);
private:
QQueue<DocumentInfo> m_docsToScan;
QList<QString> m_retrieve;
QThread m_dbThread;
QFileSystemWatcher *m_watcher;
};
class LocalDocs : public QObject class LocalDocs : public QObject
{ {
Q_OBJECT Q_OBJECT
Q_PROPERTY(QList<CollectionInfo> collectionList READ collectionList NOTIFY collectionListChanged) Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged)
public: public:
static LocalDocs *globalInstance(); static LocalDocs *globalInstance();
QList<CollectionInfo> collectionList() const { return m_collectionList; } LocalDocsModel *localDocsModel() const { return m_localDocsModel; }
void addFolder(const QString &collection, const QString &path); Q_INVOKABLE void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path); Q_INVOKABLE void removeFolder(const QString &collection, const QString &path);
QList<QString> result() const { return m_retrieveResult; } QList<QString> result() const { return m_retrieveResult; }
void requestRetrieve(const QList<QString> &collections, const QString &text); void requestRetrieve(const QList<QString> &collections, const QString &text);
@ -84,16 +27,15 @@ Q_SIGNALS:
void requestRemoveFolder(const QString &collection, const QString &path); void requestRemoveFolder(const QString &collection, const QString &path);
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text); void requestRetrieveFromDB(const QList<QString> &collections, const QString &text);
void receivedResult(); void receivedResult();
void collectionListChanged(); void localDocsModelChanged();
private Q_SLOTS: private Q_SLOTS:
void handleRetrieveResult(const QList<QString> &result); void handleRetrieveResult(const QList<QString> &result);
void handleCollectionListUpdated(const QList<CollectionInfo> &collectionList);
private: private:
LocalDocsModel *m_localDocsModel;
Database *m_database; Database *m_database;
QList<QString> m_retrieveResult; QList<QString> m_retrieveResult;
QList<CollectionInfo> m_collectionList;
private: private:
explicit LocalDocs(); explicit LocalDocs();

105
gpt4all-chat/localdocsdb.h Normal file
View File

@ -0,0 +1,105 @@
#ifndef LOCALDOCS_H
#define LOCALDOCS_H
#include "localdocsmodel.h"
#include <QObject>
#include <QtSql>
#include <QQueue>
#include <QFileInfo>
#include <QThread>
#include <QFileSystemWatcher>
struct DocumentInfo
{
int folder;
QFileInfo doc;
};
struct CollectionItem {
QString collection;
QString folder_path;
int folder_id = -1;
};
Q_DECLARE_METATYPE(CollectionItem)
class Database : public QObject
{
Q_OBJECT
public:
Database();
public Q_SLOTS:
void scanQueue();
void scanDocuments(int folder_id, const QString &folder_path);
void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path);
void retrieveFromDB(const QList<QString> &collections, const QString &text);
void cleanDB();
Q_SIGNALS:
void docsToScanChanged();
void retrieveResult(const QList<QString> &result);
void collectionListUpdated(const QList<CollectionItem> &collectionList);
private Q_SLOTS:
void start();
void directoryChanged(const QString &path);
bool addFolderToWatch(const QString &path);
bool removeFolderFromWatch(const QString &path);
void addCurrentFolders();
void updateCollectionList();
private:
void removeFolderInternal(const QString &collection, int folder_id, const QString &path);
void chunkStream(QTextStream &stream, int document_id);
void handleDocumentErrorAndScheduleNext(const QString &errorMessage,
int document_id, const QString &document_path, const QSqlError &error);
private:
QQueue<DocumentInfo> m_docsToScan;
QList<QString> m_retrieve;
QThread m_dbThread;
QFileSystemWatcher *m_watcher;
};
class LocalDocs : public QObject
{
Q_OBJECT
Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged)
public:
static LocalDocs *globalInstance();
LocalDocsModel *localDocsModel() const { return m_localDocsModel; }
void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path);
QList<QString> result() const { return m_retrieveResult; }
void requestRetrieve(const QList<QString> &collections, const QString &text);
Q_SIGNALS:
void requestAddFolder(const QString &collection, const QString &path);
void requestRemoveFolder(const QString &collection, const QString &path);
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text);
void receivedResult();
void localDocsModelChanged();
private Q_SLOTS:
void handleRetrieveResult(const QList<QString> &result);
void handleCollectionListUpdated(const QList<CollectionItem> &collectionList);
private:
LocalDocsModel *m_localDocsModel;
Database *m_database;
QList<QString> m_retrieveResult;
QList<CollectionItem> m_collectionList;
private:
explicit LocalDocs();
~LocalDocs() {}
friend class MyLocalDocs;
};
#endif // LOCALDOCS_H

View File

@ -0,0 +1,43 @@
#include "localdocsmodel.h"
LocalDocsModel::LocalDocsModel(QObject *parent)
: QAbstractListModel(parent)
{
}
int LocalDocsModel::rowCount(const QModelIndex &parent) const
{
Q_UNUSED(parent);
return m_collectionList.size();
}
QVariant LocalDocsModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid() || index.row() < 0 || index.row() >= m_collectionList.size())
return QVariant();
const CollectionItem item = m_collectionList.at(index.row());
switch (role) {
case CollectionRole:
return item.collection;
case FolderPathRole:
return item.folder_path;
}
return QVariant();
}
QHash<int, QByteArray> LocalDocsModel::roleNames() const
{
QHash<int, QByteArray> roles;
roles[CollectionRole] = "collection";
roles[FolderPathRole] = "folder_path";
return roles;
}
void LocalDocsModel::handleCollectionListUpdated(const QList<CollectionItem> &collectionList)
{
beginResetModel();
m_collectionList = collectionList;
endResetModel();
}

View File

@ -0,0 +1,29 @@
#ifndef LOCALDOCSMODEL_H
#define LOCALDOCSMODEL_H
#include <QAbstractListModel>
#include "database.h"
class LocalDocsModel : public QAbstractListModel
{
Q_OBJECT
public:
enum Roles {
CollectionRole = Qt::UserRole + 1,
FolderPathRole
};
explicit LocalDocsModel(QObject *parent = nullptr);
int rowCount(const QModelIndex & = QModelIndex()) const override;
QVariant data(const QModelIndex &index, int role) const override;
QHash<int, QByteArray> roleNames() const override;
public Q_SLOTS:
void handleCollectionListUpdated(const QList<CollectionItem> &collectionList);
private:
QList<CollectionItem> m_collectionList;
};
#endif // LOCALDOCSMODEL_H

View File

@ -0,0 +1,98 @@
import QtCore
import QtQuick
import QtQuick.Controls
import QtQuick.Controls.Basic
import QtQuick.Dialogs
import QtQuick.Layouts
Dialog {
id: addCollectionDialog
anchors.centerIn: parent
opacity: 0.9
padding: 20
modal: true
Theme {
id: theme
}
property string collection: ""
property string folder_path: ""
FolderDialog {
id: folderDialog
title: "Please choose a directory"
currentFolder: StandardPaths.writableLocation(StandardPaths.DocumentsLocation)
onAccepted: {
addCollectionDialog.folder_path = selectedFolder
}
}
Row {
anchors.top: parent.top
anchors.left: parent.left
anchors.right: parent.right
height: childrenRect.height
spacing: 20
TextField {
id: collection
implicitWidth: 200
horizontalAlignment: Text.AlignJustify
color: theme.textColor
background: Rectangle {
implicitWidth: 150
color: theme.backgroundLighter
radius: 10
}
padding: 10
placeholderText: qsTr("Collection name...")
placeholderTextColor: theme.mutedTextColor
ToolTip.text: qsTr("Name of the collection to add (Required)")
ToolTip.visible: hovered
onEditingFinished: {
addCollectionDialog.collection = text
}
Accessible.role: Accessible.EditableText
Accessible.name: collection.text
Accessible.description: ToolTip.text
}
MyTextField {
id: folderLabel
text: folder_path
readOnly: true
color: theme.textColor
implicitWidth: 300
padding: 10
placeholderText: qsTr("Folder path...")
placeholderTextColor: theme.mutedTextColor
ToolTip.text: qsTr("Folder path to documents (Required)")
ToolTip.visible: hovered
}
MyButton {
text: qsTr("Browse")
onClicked: {
folderDialog.open();
}
}
MyButton {
text: qsTr("Add")
enabled: addCollectionDialog.collection !== "" && addCollectionDialog.folder_path != ""
Accessible.role: Accessible.Button
Accessible.name: text
Accessible.description: qsTr("Add button")
DialogButtonBox.buttonRole: DialogButtonBox.AcceptRole
}
}
background: Rectangle {
anchors.fill: parent
color: theme.backgroundDarkest
border.width: 1
border.color: theme.dialogBorder
radius: 10
}
}

View File

@ -2,35 +2,175 @@ import QtQuick
import QtQuick.Controls import QtQuick.Controls
import QtQuick.Controls.Basic import QtQuick.Controls.Basic
import QtQuick.Layouts import QtQuick.Layouts
import localdocs
GridLayout { Item {
columns: 2 AddCollectionDialog {
rowSpacing: 10 id: addCollectionDialog
columnSpacing: 10
Label {
text: qsTr("Collections:")
color: theme.textColor
Layout.row: 1
Layout.column: 0
} }
RowLayout { Connections {
spacing: 10 target: addCollectionDialog
Layout.row: 1 function onAccepted() {
Layout.column: 1 LocalDocs.addFolder(addCollectionDialog.collection, addCollectionDialog.folder_path)
MyComboBox {
id: comboBox
Layout.minimumWidth: 350
} }
MyButton { }
text: "Add"
GridLayout {
id: gridLayout
columns: 2
rowSpacing: 10
columnSpacing: 10
Label {
id: contextItemsPerPrompt
text: qsTr("Context items per prompt:")
color: theme.textColor
Layout.row: 0
Layout.column: 0
} }
MyButton {
text: "Remove" MyTextField {
Layout.row: 0
Layout.column: 1
} }
MyButton {
text: "Rename" Label {
id: chunkLabel
text: qsTr("Chunksize:")
color: theme.textColor
Layout.row: 1
Layout.column: 0
}
MyTextField {
id: chunkSizeTextField
Layout.row: 1
Layout.column: 1
}
}
ScrollView {
id: scrollView
anchors.top: gridLayout.bottom
anchors.topMargin: 20
anchors.bottom: newCollectionButton.top
anchors.bottomMargin: 10
anchors.left: parent.left
anchors.right: parent.right
clip: true
contentHeight: 300
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
background: Rectangle {
color: theme.backgroundLighter
}
ListView {
id: listView
model: LocalDocs.localDocsModel
boundsBehavior: Flickable.StopAtBounds
headerPositioning: ListView.InlineHeader
header: Rectangle {
width: listView.width
height: collectionLabel.height + 40
color: theme.backgroundDark
Label {
id: collectionLabel
anchors.verticalCenter: parent.verticalCenter
anchors.left: parent.left
anchors.margins: 20
text: "Collection"
color: theme.textColor
font.bold: true
width: 200
}
Label {
anchors.left: collectionLabel.right
anchors.margins: 20
anchors.verticalCenter: parent.verticalCenter
text: "Folder"
color: theme.textColor
font.bold: true
}
}
delegate: Rectangle {
id: item
width: listView.width
height: buttons.height + 20
color: index % 2 === 0 ? theme.backgroundLight : theme.backgroundLighter
property bool removing: false
Text {
id: collectionId
anchors.verticalCenter: parent.verticalCenter
anchors.left: parent.left
anchors.margins: 20
text: collection
elide: Text.ElideRight
color: theme.textColor
width: 200
}
Text {
id: folderId
anchors.left: collectionId.right
anchors.margins: 20
anchors.verticalCenter: parent.verticalCenter
text: folder_path
elide: Text.ElideRight
color: theme.textColor
}
Item {
id: buttons
anchors.right: parent.right
anchors.verticalCenter: parent.verticalCenter
anchors.margins: 20
width: childrenRect.width
height: Math.max(removeButton.height, busyIndicator.height)
MyButton {
id: removeButton
anchors.verticalCenter: parent.verticalCenter
text: qsTr("Remove")
visible: !item.removing
onClicked: {
item.removing = true
LocalDocs.removeFolder(collection, folder_path)
}
}
BusyIndicator {
id: busyIndicator
anchors.verticalCenter: parent.verticalCenter
visible: item.removing
}
}
}
}
}
MyButton {
id: newCollectionButton
anchors.right: parent.right
anchors.bottom: parent.bottom
text: qsTr("New collection")
onClicked: {
addCollectionDialog.open();
}
}
MyButton {
id: restoreDefaultsButton
anchors.left: parent.left
anchors.bottom: parent.bottom
text: qsTr("Restore Defaults")
Accessible.role: Accessible.Button
Accessible.name: text
Accessible.description: qsTr("Restores the settings dialog to a default state")
onClicked: {
// settingsDialog.restoreGenerationDefaults()
} }
} }
} }

View File

@ -234,21 +234,28 @@ Dialog {
anchors.top: parent.top anchors.top: parent.top
anchors.left: parent.left anchors.left: parent.left
anchors.right: parent.right anchors.right: parent.right
height: 1 ? localDocsButton.checked : 0 height: localDocsButton.checked
color: theme.tabBorder
}
Rectangle {
anchors.bottom: parent.bottom
anchors.left: parent.left
anchors.right: parent.right
height: !localDocsButton.checked
color: theme.tabBorder color: theme.tabBorder
} }
Rectangle { Rectangle {
anchors.top: parent.top anchors.top: parent.top
anchors.bottom: parent.bottom anchors.bottom: parent.bottom
anchors.left: parent.left anchors.left: parent.left
width: 1 ? localDocsButton.checked : 0 width: localDocsButton.checked
color: theme.tabBorder color: theme.tabBorder
} }
Rectangle { Rectangle {
anchors.top: parent.top anchors.top: parent.top
anchors.bottom: parent.bottom anchors.bottom: parent.bottom
anchors.right: parent.right anchors.right: parent.right
width: 1 ? localDocsButton.checked : 0 width: localDocsButton.checked
color: theme.tabBorder color: theme.tabBorder
} }
} }