diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index b3e048d0..e525d163 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -62,9 +62,10 @@ qt_add_executable(chat chatllm.h chatllm.cpp chatmodel.h chatlistmodel.h chatlistmodel.cpp chatgpt.h chatgpt.cpp + database.h database.cpp download.h download.cpp network.h network.cpp - localdocs.h localdocs.cpp + localdocs.h localdocs.cpp localdocsmodel.h localdocsmodel.cpp llm.h llm.cpp server.h server.cpp sysinfo.h @@ -77,6 +78,7 @@ qt_add_qml_module(chat main.qml qml/ChatDrawer.qml qml/LocalDocs.qml + qml/AddCollectionDialog.qml qml/ModelDownloaderDialog.qml qml/NetworkDialog.qml qml/NewVersionDialog.qml diff --git a/gpt4all-chat/database.cpp b/gpt4all-chat/database.cpp new file mode 100644 index 00000000..0c6ff83b --- /dev/null +++ b/gpt4all-chat/database.cpp @@ -0,0 +1,988 @@ +#include "database.h" +#include "download.h" + +#include +#include + +//#define DEBUG +//#define DEBUG_EXAMPLE + +#define LOCALDOCS_VERSION 0 + +const auto INSERT_CHUNK_SQL = QLatin1String(R"( + insert into chunks(document_id, chunk_id, chunk_text, + embedding_id, embedding_path) values(?, ?, ?, ?, ?); + )"); + +const auto INSERT_CHUNK_FTS_SQL = QLatin1String(R"( + insert into chunks_fts(document_id, chunk_id, chunk_text, + embedding_id, embedding_path) values(?, ?, ?, ?, ?); + )"); + +const auto DELETE_CHUNKS_SQL = QLatin1String(R"( + delete from chunks WHERE document_id = ?; + )"); + +const auto DELETE_CHUNKS_FTS_SQL = QLatin1String(R"( + delete from chunks_fts WHERE document_id = ?; + )"); + +const auto CHUNKS_SQL = QLatin1String(R"( + create table chunks(document_id integer, chunk_id integer, chunk_text varchar, + embedding_id integer, embedding_path varchar); + )"); + +const auto FTS_CHUNKS_SQL = QLatin1String(R"( + create virtual table chunks_fts using fts5(document_id unindexed, chunk_id unindexed, chunk_text, + embedding_id unindexed, embedding_path unindexed, tokenize="trigram"); + )"); + +const auto SELECT_SQL = QLatin1String(R"( + select chunks_fts.rowid, chunks_fts.document_id, chunks_fts.chunk_text + from chunks_fts + join documents ON chunks_fts.document_id = documents.id + join folders ON documents.folder_id = folders.id + join collections ON folders.id = collections.folder_id + where chunks_fts match ? and collections.collection_name in (%1) + order by bm25(chunks_fts) desc + limit 3; + )"); + +bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id, + const QString &embedding_path) +{ + { + if (!q.prepare(INSERT_CHUNK_SQL)) + return false; + q.addBindValue(document_id); + q.addBindValue(chunk_id); + q.addBindValue(chunk_text); + q.addBindValue(embedding_id); + q.addBindValue(embedding_path); + if (!q.exec()) + return false; + } + { + if (!q.prepare(INSERT_CHUNK_FTS_SQL)) + return false; + q.addBindValue(document_id); + q.addBindValue(chunk_id); + q.addBindValue(chunk_text); + q.addBindValue(embedding_id); + q.addBindValue(embedding_path); + if (!q.exec()) + return false; + } + return true; +} + +bool removeChunksByDocumentId(QSqlQuery &q, int document_id) +{ + { + if (!q.prepare(DELETE_CHUNKS_SQL)) + return false; + q.addBindValue(document_id); + if (!q.exec()) + return false; + } + + { + if (!q.prepare(DELETE_CHUNKS_FTS_SQL)) + return false; + q.addBindValue(document_id); + if (!q.exec()) + return false; + } + + return true; +} + +QStringList generateGrams(const QString &input, int N) +{ + // Remove common English punctuation using QRegularExpression + QRegularExpression punctuation(R"([.,;:!?'"()\-])"); + QString cleanedInput = input; + cleanedInput = cleanedInput.remove(punctuation); + + // Split the cleaned input into words using whitespace + QStringList words = cleanedInput.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts); + N = qMin(words.size(), N); + + // Generate all possible N-grams + QStringList ngrams; + for (int i = 0; i < words.size() - (N - 1); ++i) { + QStringList currentNgram; + for (int j = 0; j < N; ++j) { + currentNgram.append("\"" + words[i + j] + "\""); + } + ngrams.append("NEAR(" + currentNgram.join(" ") + ", " + QString::number(N) + ")"); + } + return ngrams; +} + +bool selectChunk(QSqlQuery &q, const QList &collection_names, const QString &chunk_text) +{ + const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size(); + for (int N = N_WORDS; N > 2; N--) { + // first try trigrams + QList text = generateGrams(chunk_text, N); + QString orText = text.join(" OR "); + const QString collection_names_str = collection_names.join("', '"); + const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'"); + if (!q.prepare(formatted_query)) + return false; + q.addBindValue(orText); + bool success = q.exec(); + if (!success) return false; + if (q.next()) { +#if defined(DEBUG) + qDebug() << "hit on" << N << "before" << chunk_text << "after" << orText; +#endif + q.previous(); + return true; + } + } + return true; +} + +void printResults(QSqlQuery &q) +{ + while (q.next()) { + int rowid = q.value(0).toInt(); + QString collection_name = q.value(1).toString(); + QString chunk_text = q.value(2).toString(); + + qDebug() << "rowid:" << rowid + << "collection_name:" << collection_name + << "chunk_text:" << chunk_text; + } +} + +const auto INSERT_COLLECTION_SQL = QLatin1String(R"( + insert into collections(collection_name, folder_id) values(?, ?); + )"); + +const auto DELETE_COLLECTION_SQL = QLatin1String(R"( + delete from collections where collection_name = ? and folder_id = ?; + )"); + +const auto COLLECTIONS_SQL = QLatin1String(R"( + create table collections(collection_name varchar, folder_id integer, unique(collection_name, folder_id)); + )"); + +const auto SELECT_FOLDERS_FROM_COLLECTIONS_SQL = QLatin1String(R"( + select folder_id from collections where collection_name = ?; + )"); + +const auto SELECT_COLLECTIONS_FROM_FOLDER_SQL = QLatin1String(R"( + select collection_name from collections where folder_id = ?; + )"); + +const auto SELECT_COLLECTIONS_SQL = QLatin1String(R"( + select c.collection_name, f.folder_path, f.id + from collections c + join folders f on c.folder_id = f.id + order by c.collection_name asc, f.folder_path asc; + )"); + +bool addCollection(QSqlQuery &q, const QString &collection_name, int folder_id) +{ + if (!q.prepare(INSERT_COLLECTION_SQL)) + return false; + q.addBindValue(collection_name); + q.addBindValue(folder_id); + return q.exec(); +} + +bool removeCollection(QSqlQuery &q, const QString &collection_name, int folder_id) +{ + if (!q.prepare(DELETE_COLLECTION_SQL)) + return false; + q.addBindValue(collection_name); + q.addBindValue(folder_id); + return q.exec(); +} + +bool selectFoldersFromCollection(QSqlQuery &q, const QString &collection_name, QList *folderIds) { + if (!q.prepare(SELECT_FOLDERS_FROM_COLLECTIONS_SQL)) + return false; + q.addBindValue(collection_name); + if (!q.exec()) + return false; + while (q.next()) + folderIds->append(q.value(0).toInt()); + return true; +} + +bool selectCollectionsFromFolder(QSqlQuery &q, int folder_id, QList *collections) { + if (!q.prepare(SELECT_COLLECTIONS_FROM_FOLDER_SQL)) + return false; + q.addBindValue(folder_id); + if (!q.exec()) + return false; + while (q.next()) + collections->append(q.value(0).toString()); + return true; +} + +bool selectAllFromCollections(QSqlQuery &q, QList *collections) { + if (!q.prepare(SELECT_COLLECTIONS_SQL)) + return false; + if (!q.exec()) + return false; + while (q.next()) { + CollectionItem i; + i.collection = q.value(0).toString(); + i.folder_path = q.value(1).toString(); + i.folder_id = q.value(0).toInt(); + collections->append(i); + } + return true; +} + +const auto INSERT_FOLDERS_SQL = QLatin1String(R"( + insert into folders(folder_path) values(?); + )"); + +const auto DELETE_FOLDERS_SQL = QLatin1String(R"( + delete from folders where id = ?; + )"); + +const auto SELECT_FOLDERS_FROM_PATH_SQL = QLatin1String(R"( + select id from folders where folder_path = ?; + )"); + +const auto SELECT_FOLDERS_FROM_ID_SQL = QLatin1String(R"( + select folder_path from folders where id = ?; + )"); + +const auto SELECT_ALL_FOLDERPATHS_SQL = QLatin1String(R"( + select folder_path from folders; + )"); + +const auto FOLDERS_SQL = QLatin1String(R"( + create table folders(id integer primary key, folder_path varchar unique); + )"); + +bool addFolderToDB(QSqlQuery &q, const QString &folder_path, int *folder_id) +{ + if (!q.prepare(INSERT_FOLDERS_SQL)) + return false; + q.addBindValue(folder_path); + if (!q.exec()) + return false; + *folder_id = q.lastInsertId().toInt(); + return true; +} + +bool removeFolderFromDB(QSqlQuery &q, int folder_id) { + if (!q.prepare(DELETE_FOLDERS_SQL)) + return false; + q.addBindValue(folder_id); + return q.exec(); +} + +bool selectFolder(QSqlQuery &q, const QString &folder_path, int *id) { + if (!q.prepare(SELECT_FOLDERS_FROM_PATH_SQL)) + return false; + q.addBindValue(folder_path); + if (!q.exec()) + return false; + Q_ASSERT(q.size() < 2); + if (q.next()) + *id = q.value(0).toInt(); + return true; +} + +bool selectFolder(QSqlQuery &q, int id, QString *folder_path) { + if (!q.prepare(SELECT_FOLDERS_FROM_ID_SQL)) + return false; + q.addBindValue(id); + if (!q.exec()) + return false; + Q_ASSERT(q.size() < 2); + if (q.next()) + *folder_path = q.value(0).toString(); + return true; +} + +bool selectAllFolderPaths(QSqlQuery &q, QList *folder_paths) { + if (!q.prepare(SELECT_ALL_FOLDERPATHS_SQL)) + return false; + if (!q.exec()) + return false; + while (q.next()) + folder_paths->append(q.value(0).toString()); + return true; +} + +const auto INSERT_DOCUMENTS_SQL = QLatin1String(R"( + insert into documents(folder_id, document_time, document_path) values(?, ?, ?); + )"); + +const auto UPDATE_DOCUMENT_TIME_SQL = QLatin1String(R"( + update documents set document_time = ? where id = ?; + )"); + +const auto DELETE_DOCUMENTS_SQL = QLatin1String(R"( + delete from documents where id = ?; + )"); + +const auto DOCUMENTS_SQL = QLatin1String(R"( + create table documents(id integer primary key, folder_id integer, document_time integer, document_path varchar unique); + )"); + +const auto SELECT_DOCUMENT_SQL = QLatin1String(R"( + select id, document_time from documents where document_path = ?; + )"); + +const auto SELECT_DOCUMENTS_SQL = QLatin1String(R"( + select id from documents where folder_id = ?; + )"); + +const auto SELECT_ALL_DOCUMENTS_SQL = QLatin1String(R"( + select id, document_path from documents; + )"); + +bool addDocument(QSqlQuery &q, int folder_id, qint64 document_time, const QString &document_path, int *document_id) +{ + if (!q.prepare(INSERT_DOCUMENTS_SQL)) + return false; + q.addBindValue(folder_id); + q.addBindValue(document_time); + q.addBindValue(document_path); + if (!q.exec()) + return false; + *document_id = q.lastInsertId().toInt(); + return true; +} + +bool removeDocument(QSqlQuery &q, int document_id) { + if (!q.prepare(DELETE_DOCUMENTS_SQL)) + return false; + q.addBindValue(document_id); + return q.exec(); +} + +bool updateDocument(QSqlQuery &q, int id, qint64 document_time) +{ + if (!q.prepare(UPDATE_DOCUMENT_TIME_SQL)) + return false; + q.addBindValue(id); + q.addBindValue(document_time); + return q.exec(); +} + +bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, qint64 *document_time) { + if (!q.prepare(SELECT_DOCUMENT_SQL)) + return false; + q.addBindValue(document_path); + if (!q.exec()) + return false; + Q_ASSERT(q.size() < 2); + if (q.next()) { + *id = q.value(0).toInt(); + *document_time = q.value(1).toLongLong(); + } + return true; +} + +bool selectDocuments(QSqlQuery &q, int folder_id, QList *documentIds) { + if (!q.prepare(SELECT_DOCUMENTS_SQL)) + return false; + q.addBindValue(folder_id); + if (!q.exec()) + return false; + while (q.next()) + documentIds->append(q.value(0).toInt()); + return true; +} + +QSqlError initDb() +{ + QString dbPath = Download::globalInstance()->downloadLocalModelsPath() + + QString("localdocs_v%1.db").arg(LOCALDOCS_VERSION); + QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE"); + db.setDatabaseName(dbPath); + + if (!db.open()) + return db.lastError(); + + QStringList tables = db.tables(); + if (tables.contains("chunks", Qt::CaseInsensitive)) + return QSqlError(); + + QSqlQuery q; + if (!q.exec(CHUNKS_SQL)) + return q.lastError(); + + if (!q.exec(FTS_CHUNKS_SQL)) + return q.lastError(); + + if (!q.exec(COLLECTIONS_SQL)) + return q.lastError(); + + if (!q.exec(FOLDERS_SQL)) + return q.lastError(); + + if (!q.exec(DOCUMENTS_SQL)) + return q.lastError(); + +#if defined(DEBUG_EXAMPLE) + // Add a folder + QString folder_path = "/example/folder"; + int folder_id; + if (!addFolderToDB(q, folder_path, &folder_id)) { + qDebug() << "Error adding folder:" << q.lastError().text(); + return q.lastError(); + } + + // Add a collection + QString collection_name = "Example Collection"; + if (!addCollection(q, collection_name, folder_id)) { + qDebug() << "Error adding collection:" << q.lastError().text(); + return q.lastError(); + } + + // Add a document + int document_time = 123456789; + int document_id; + QString document_path = "/example/folder/document1.txt"; + if (!addDocument(q, folder_id, document_time, document_path, &document_id)) { + qDebug() << "Error adding document:" << q.lastError().text(); + return q.lastError(); + } + + // Add chunks to the document + QString chunk_text1 = "This is an example chunk."; + QString chunk_text2 = "Another example chunk."; + QString embedding_path = "/example/embeddings/embedding1.bin"; + int embedding_id = 1; + + if (!addChunk(q, document_id, 1, chunk_text1, embedding_id, embedding_path) || + !addChunk(q, document_id, 2, chunk_text2, embedding_id, embedding_path)) { + qDebug() << "Error adding chunks:" << q.lastError().text(); + return q.lastError(); + } + + // Perform a search + QList collection_names = {collection_name}; + QString search_text = "example"; + if (!selectChunk(q, collection_names, search_text)) { + qDebug() << "Error selecting chunks:" << q.lastError().text(); + return q.lastError(); + } + + // Print the results + printResults(q); +#endif + + return QSqlError(); +} + +Database::Database() + : QObject(nullptr) + , m_watcher(new QFileSystemWatcher(this)) +{ + moveToThread(&m_dbThread); + connect(&m_dbThread, &QThread::started, this, &Database::start); + m_dbThread.setObjectName("database"); + m_dbThread.start(); +} + +void Database::handleDocumentErrorAndScheduleNext(const QString &errorMessage, + int document_id, const QString &document_path, const QSqlError &error) +{ + qWarning() << errorMessage << document_id << document_path << error.text(); + if (!m_docsToScan.isEmpty()) + QTimer::singleShot(0, this, &Database::scanQueue); +} + +void Database::chunkStream(QTextStream &stream, int document_id) +{ + const int chunkSize = 256; + int chunk_id = 0; + int charCount = 0; + QList words; + + while (!stream.atEnd()) { + QString word; + stream >> word; + charCount += word.length(); + words.append(word); + if (charCount + words.size() - 1 >= chunkSize || stream.atEnd()) { + const QString chunk = words.join(" "); + QSqlQuery q; + if (!addChunk(q, + document_id, + ++chunk_id, + chunk, + 0 /*embedding_id*/, + QString() /*embedding_path*/ + )) { + qWarning() << "ERROR: Could not insert chunk into db" << q.lastError(); + } + words.clear(); + charCount = 0; + } + } +} + +void Database::scanQueue() +{ + if (m_docsToScan.isEmpty()) + return; + + DocumentInfo info = m_docsToScan.dequeue(); + + // Update info + info.doc.stat(); + + // If the doc has since been deleted or no longer readable, then we schedule more work and return + // leaving the cleanup for the cleanup handler + if (!info.doc.exists() || !info.doc.isReadable()) { + if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); + return; + } + + const int folder_id = info.folder; + const qint64 document_time = info.doc.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch(); + const QString document_path = info.doc.canonicalFilePath(); + +#if defined(DEBUG) + qDebug() << "scanning document" << document_path; +#endif + + // Check and see if we already have this document + QSqlQuery q; + int existing_id = -1; + qint64 existing_time = -1; + if (!selectDocument(q, document_path, &existing_id, &existing_time)) { + return handleDocumentErrorAndScheduleNext("ERROR: Cannot select document", + existing_id, document_path, q.lastError()); + } + + // If we have the document, we need to compare the last modification time and if it is newer + // we must rescan the document, otherwise return + if (existing_id != -1) { + Q_ASSERT(existing_time != -1); + if (document_time == existing_time) { + // No need to rescan, but we do have to schedule next + if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); + return; + } else { + if (!removeChunksByDocumentId(q, existing_id)) { + return handleDocumentErrorAndScheduleNext("ERROR: Cannot remove chunks of document", + existing_id, document_path, q.lastError()); + } + } + } + + // Update the document_time for an existing document, or add it for the first time now + int document_id = existing_id; + if (document_id != -1) { + if (!updateDocument(q, document_id, document_time)) { + return handleDocumentErrorAndScheduleNext("ERROR: Could not update document_time", + document_id, document_path, q.lastError()); + } + } else { + if (!addDocument(q, folder_id, document_time, document_path, &document_id)) { + return handleDocumentErrorAndScheduleNext("ERROR: Could not add document", + document_id, document_path, q.lastError()); + } + } + + QElapsedTimer timer; + timer.start(); + + QSqlDatabase::database().transaction(); + Q_ASSERT(document_id != -1); + if (info.doc.suffix() == QLatin1String("pdf")) { + QPdfDocument doc; + if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) { + return handleDocumentErrorAndScheduleNext("ERROR: Could not load pdf", + document_id, document_path, q.lastError()); + return; + } + QString text; + for (int i = 0; i < doc.pageCount(); ++i) { + const QPdfSelection selection = doc.getAllText(i); + text.append(selection.text()); + } + QTextStream stream(&text); + chunkStream(stream, document_id); + } else { + QFile file(document_path); + if (!file.open( QIODevice::ReadOnly)) { + return handleDocumentErrorAndScheduleNext("ERROR: Cannot open file for scanning", + existing_id, document_path, q.lastError()); + } + QTextStream stream(&file); + chunkStream(stream, document_id); + file.close(); + } + QSqlDatabase::database().commit(); + +#if defined(DEBUG) + qDebug() << "chunking" << document_path << "took" << timer.elapsed() << "ms"; +#endif + + if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); +} + +void Database::scanDocuments(int folder_id, const QString &folder_path) +{ +#if defined(DEBUG) + qDebug() << "scanning folder for documents" << folder_path; +#endif + + static const QList extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", + "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "tex", "asc", "wks", + "wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx", + "plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht", + "mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps", + "xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1", + "wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1", + "wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm", + "xlt", "xln" }; + + QDir dir(folder_path); + Q_ASSERT(dir.exists()); + Q_ASSERT(dir.isReadable()); + QDirIterator it(folder_path, QDir::Readable | QDir::Files, QDirIterator::Subdirectories); + while (it.hasNext()) { + it.next(); + QFileInfo fileInfo = it.fileInfo(); + if (fileInfo.isDir()) { + addFolderToWatch(fileInfo.canonicalFilePath()); + continue; + } + + if (!extensions.contains(fileInfo.suffix())) + continue; + + DocumentInfo info; + info.folder = folder_id; + info.doc = fileInfo; + m_docsToScan.enqueue(info); + } + emit docsToScanChanged(); +} + +void Database::start() +{ + connect(m_watcher, &QFileSystemWatcher::directoryChanged, this, &Database::directoryChanged); + connect(this, &Database::docsToScanChanged, this, &Database::scanQueue); + if (!QSqlDatabase::drivers().contains("QSQLITE")) { + qWarning() << "ERROR: missing sqllite driver"; + } else { + QSqlError err = initDb(); + if (err.type() != QSqlError::NoError) + qWarning() << "ERROR: initializing db" << err.text(); + } + addCurrentFolders(); +} + +void Database::addCurrentFolders() +{ +#if defined(DEBUG) + qDebug() << "addCurrentFolders"; +#endif + + QSqlQuery q; + QList collections; + if (!selectAllFromCollections(q, &collections)) { + qWarning() << "ERROR: Cannot select collections" << q.lastError(); + return; + } + + for (auto i : collections) + addFolder(i.collection, i.folder_path); +} + +void Database::updateCollectionList() +{ +#if defined(DEBUG) + qDebug() << "updateCollectionList"; +#endif + + QSqlQuery q; + QList collections; + if (!selectAllFromCollections(q, &collections)) { + qWarning() << "ERROR: Cannot select collections" << q.lastError(); + return; + } + emit collectionListUpdated(collections); +} + +void Database::addFolder(const QString &collection, const QString &path) +{ + QFileInfo info(path); + if (!info.exists() || !info.isReadable()) { + qWarning() << "ERROR: Cannot add folder that doesn't exist or not readable" << path; + return; + } + + QSqlQuery q; + int folder_id = -1; + + // See if the folder exists in the db + if (!selectFolder(q, path, &folder_id)) { + qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); + return; + } + + // Add the folder + if (folder_id == -1 && !addFolderToDB(q, path, &folder_id)) { + qWarning() << "ERROR: Cannot add folder to db with path" << path << q.lastError(); + return; + } + + Q_ASSERT(folder_id != -1); + + // See if the folder has already been added to the collection + QList folders; + if (!selectFoldersFromCollection(q, collection, &folders)) { + qWarning() << "ERROR: Cannot select folders from collections" << collection << q.lastError(); + return; + } + + if (!folders.contains(folder_id) && !addCollection(q, collection, folder_id)) { + qWarning() << "ERROR: Cannot add folder to collection" << collection << path << q.lastError(); + return; + } + + if (!addFolderToWatch(path)) + return; + + scanDocuments(folder_id, path); + updateCollectionList(); +} + +void Database::removeFolder(const QString &collection, const QString &path) +{ +#if defined(DEBUG) + qDebug() << "removeFolder" << path; +#endif + + QSqlQuery q; + int folder_id = -1; + + // See if the folder exists in the db + if (!selectFolder(q, path, &folder_id)) { + qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); + return; + } + + // If we don't have a folder_id in the db, then something bad has happened + Q_ASSERT(folder_id != -1); + if (folder_id == -1) { + qWarning() << "ERROR: Collected folder does not exist in db" << path; + m_watcher->removePath(path); + return; + } + + removeFolderInternal(collection, folder_id, path); +} + +void Database::removeFolderInternal(const QString &collection, int folder_id, const QString &path) +{ + // Determine if the folder is used by more than one collection + QSqlQuery q; + QList collections; + if (!selectCollectionsFromFolder(q, folder_id, &collections)) { + qWarning() << "ERROR: Cannot select collections from folder" << folder_id << q.lastError(); + return; + } + + // Remove it from the collections + if (!removeCollection(q, collection, folder_id)) { + qWarning() << "ERROR: Cannot remove collection" << collection << folder_id << q.lastError(); + return; + } + + // If the folder is associated with more than one collection, then return + if (collections.count() > 1) + return; + + // First remove all upcoming jobs associated with this folder by performing an opt-in filter + QQueue docsToScan; + for (DocumentInfo info : m_docsToScan) { + if (info.folder == folder_id) + continue; + docsToScan.append(info); + } + m_docsToScan = docsToScan; + emit docsToScanChanged(); + + // Get a list of all documents associated with folder + QList documentIds; + if (!selectDocuments(q, folder_id, &documentIds)) { + qWarning() << "ERROR: Cannot select documents" << folder_id << q.lastError(); + return; + } + + // Remove all chunks and documents associated with this folder + for (int document_id : documentIds) { + if (!removeChunksByDocumentId(q, document_id)) { + qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << q.lastError(); + return; + } + + if (!removeDocument(q, document_id)) { + qWarning() << "ERROR: Cannot remove document_id" << document_id << q.lastError(); + return; + } + } + + if (!removeFolderFromDB(q, folder_id)) { + qWarning() << "ERROR: Cannot remove folder_id" << folder_id << q.lastError(); + return; + } + + removeFolderFromWatch(path); + updateCollectionList(); +} + +bool Database::addFolderToWatch(const QString &path) +{ +#if defined(DEBUG) + qDebug() << "addFolderToWatch" << path; +#endif + + if (!m_watcher->addPath(path)) { + qWarning() << "ERROR: Cannot add path to file watcher:" << path; + return false; + } + return true; +} + +bool Database::removeFolderFromWatch(const QString &path) +{ +#if defined(DEBUG) + qDebug() << "removeFolderFromWatch" << path; +#endif + if (!m_watcher->removePath(path)) { + qWarning() << "ERROR: Cannot remove path from file watcher:" << path; + return false; + } + return true; +} + +void Database::retrieveFromDB(const QList &collections, const QString &text) +{ +#if defined(DEBUG) + qDebug() << "retrieveFromDB" << collections << text; +#endif + + QSqlQuery q; + if (!selectChunk(q, collections, text)) { + qDebug() << "ERROR: selecting chunks:" << q.lastError().text(); + return; + } + + QList results; + while (q.next()) { + int rowid = q.value(0).toInt(); + QString collection_name = q.value(1).toString(); + QString chunk_text = q.value(2).toString(); + results.append(chunk_text); +#if defined(DEBUG) + qDebug() << "retrieve rowid:" << rowid + << "collection_name:" << collection_name + << "chunk_text:" << chunk_text; +#endif + } + + emit retrieveResult(results); +} + +void Database::cleanDB() +{ +#if defined(DEBUG) + qDebug() << "cleanDB"; +#endif + + // Scan all folders in db to make sure they still exist + QSqlQuery q; + QList collections; + if (!selectAllFromCollections(q, &collections)) { + qWarning() << "ERROR: Cannot select collections" << q.lastError(); + return; + } + + for (auto i : collections) { + // Find the path for the folder + QFileInfo info(i.folder_path); + if (!info.exists() || !info.isReadable()) { +#if defined(DEBUG) + qDebug() << "clean db removing folder" << i.folder_id << i.folder_path; +#endif + removeFolderInternal(i.collection, i.folder_id, i.folder_path); + } + } + + // Scan all documents in db to make sure they still exist + if (!q.prepare(SELECT_ALL_DOCUMENTS_SQL)) { + qWarning() << "ERROR: Cannot prepare sql for select all documents" << q.lastError(); + return; + } + + if (!q.exec()) { + qWarning() << "ERROR: Cannot exec sql for select all documents" << q.lastError(); + return; + } + + while (q.next()) { + int document_id = q.value(0).toInt(); + QString document_path = q.value(1).toString(); + QFileInfo info(document_path); + if (info.exists() && info.isReadable()) + continue; + +#if defined(DEBUG) + qDebug() << "clean db removing document" << document_id << document_path; +#endif + + // Remove all chunks and documents that either don't exist or have become unreadable + QSqlQuery query; + if (!removeChunksByDocumentId(query, document_id)) { + qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << query.lastError(); + } + + if (!removeDocument(query, document_id)) { + qWarning() << "ERROR: Cannot remove document_id" << document_id << query.lastError(); + } + } + updateCollectionList(); +} + +void Database::directoryChanged(const QString &path) +{ +#if defined(DEBUG) + qDebug() << "directoryChanged" << path; +#endif + + QSqlQuery q; + int folder_id = -1; + + // Lookup the folder_id in the db + if (!selectFolder(q, path, &folder_id)) { + qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); + return; + } + + // If we don't have a folder_id in the db, then something bad has happened + Q_ASSERT(folder_id != -1); + if (folder_id == -1) { + qWarning() << "ERROR: Watched folder does not exist in db" << path; + m_watcher->removePath(path); + return; + } + + // Clean the database + cleanDB(); + + // Rescan the documents associated with the folder + scanDocuments(folder_id, path); +} diff --git a/gpt4all-chat/database.h b/gpt4all-chat/database.h new file mode 100644 index 00000000..0e13ee98 --- /dev/null +++ b/gpt4all-chat/database.h @@ -0,0 +1,64 @@ +#ifndef DATABASE_H +#define DATABASE_H + +#include +#include +#include +#include +#include +#include + +struct DocumentInfo +{ + int folder; + QFileInfo doc; +}; + +struct CollectionItem { + QString collection; + QString folder_path; + int folder_id = -1; +}; +Q_DECLARE_METATYPE(CollectionItem) + +class Database : public QObject +{ + Q_OBJECT +public: + Database(); + +public Q_SLOTS: + void scanQueue(); + void scanDocuments(int folder_id, const QString &folder_path); + void addFolder(const QString &collection, const QString &path); + void removeFolder(const QString &collection, const QString &path); + void retrieveFromDB(const QList &collections, const QString &text); + void cleanDB(); + +Q_SIGNALS: + void docsToScanChanged(); + void retrieveResult(const QList &result); + void collectionListUpdated(const QList &collectionList); + +private Q_SLOTS: + void start(); + void directoryChanged(const QString &path); + bool addFolderToWatch(const QString &path); + bool removeFolderFromWatch(const QString &path); + void addCurrentFolders(); + void updateCollectionList(); + +private: + void removeFolderInternal(const QString &collection, int folder_id, const QString &path); + void chunkStream(QTextStream &stream, int document_id); + void handleDocumentErrorAndScheduleNext(const QString &errorMessage, + int document_id, const QString &document_path, const QSqlError &error); + +private: + QQueue m_docsToScan; + QList m_retrieve; + QThread m_dbThread; + QFileSystemWatcher *m_watcher; +}; + +#endif // DATABASE_H diff --git a/gpt4all-chat/localdocs.cpp b/gpt4all-chat/localdocs.cpp index 13648dc2..b340b9c4 100644 --- a/gpt4all-chat/localdocs.cpp +++ b/gpt4all-chat/localdocs.cpp @@ -1,1015 +1,4 @@ #include "localdocs.h" -#include "download.h" - -#include -#include - -//#define DEBUG -//#define DEBUG_EXAMPLE - -#define LOCALDOCS_VERSION 0 - -const auto INSERT_CHUNK_SQL = QLatin1String(R"( - insert into chunks(document_id, chunk_id, chunk_text, - embedding_id, embedding_path) values(?, ?, ?, ?, ?); - )"); - -const auto INSERT_CHUNK_FTS_SQL = QLatin1String(R"( - insert into chunks_fts(document_id, chunk_id, chunk_text, - embedding_id, embedding_path) values(?, ?, ?, ?, ?); - )"); - -const auto DELETE_CHUNKS_SQL = QLatin1String(R"( - delete from chunks WHERE document_id = ?; - )"); - -const auto DELETE_CHUNKS_FTS_SQL = QLatin1String(R"( - delete from chunks_fts WHERE document_id = ?; - )"); - -const auto CHUNKS_SQL = QLatin1String(R"( - create table chunks(document_id integer, chunk_id integer, chunk_text varchar, - embedding_id integer, embedding_path varchar); - )"); - -const auto FTS_CHUNKS_SQL = QLatin1String(R"( - create virtual table chunks_fts using fts5(document_id unindexed, chunk_id unindexed, chunk_text, - embedding_id unindexed, embedding_path unindexed, tokenize="trigram"); - )"); - -const auto SELECT_SQL = QLatin1String(R"( - select chunks_fts.rowid, chunks_fts.document_id, chunks_fts.chunk_text - from chunks_fts - join documents ON chunks_fts.document_id = documents.id - join folders ON documents.folder_id = folders.id - join collections ON folders.id = collections.folder_id - where chunks_fts match ? and collections.collection_name in (%1) - order by bm25(chunks_fts) desc - limit 3; - )"); - -bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id, - const QString &embedding_path) -{ - { - if (!q.prepare(INSERT_CHUNK_SQL)) - return false; - q.addBindValue(document_id); - q.addBindValue(chunk_id); - q.addBindValue(chunk_text); - q.addBindValue(embedding_id); - q.addBindValue(embedding_path); - if (!q.exec()) - return false; - } - { - if (!q.prepare(INSERT_CHUNK_FTS_SQL)) - return false; - q.addBindValue(document_id); - q.addBindValue(chunk_id); - q.addBindValue(chunk_text); - q.addBindValue(embedding_id); - q.addBindValue(embedding_path); - if (!q.exec()) - return false; - } - return true; -} - -bool removeChunksByDocumentId(QSqlQuery &q, int document_id) -{ - { - if (!q.prepare(DELETE_CHUNKS_SQL)) - return false; - q.addBindValue(document_id); - if (!q.exec()) - return false; - } - - { - if (!q.prepare(DELETE_CHUNKS_FTS_SQL)) - return false; - q.addBindValue(document_id); - if (!q.exec()) - return false; - } - - return true; -} - -QStringList generateGrams(const QString &input, int N) -{ - // Remove common English punctuation using QRegularExpression - QRegularExpression punctuation(R"([.,;:!?'"()\-])"); - QString cleanedInput = input; - cleanedInput = cleanedInput.remove(punctuation); - - // Split the cleaned input into words using whitespace - QStringList words = cleanedInput.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts); - N = qMin(words.size(), N); - - // Generate all possible N-grams - QStringList ngrams; - for (int i = 0; i < words.size() - (N - 1); ++i) { - QStringList currentNgram; - for (int j = 0; j < N; ++j) { - currentNgram.append("\"" + words[i + j] + "\""); - } - ngrams.append("NEAR(" + currentNgram.join(" ") + ", " + QString::number(N) + ")"); - } - return ngrams; -} - -bool selectChunk(QSqlQuery &q, const QList &collection_names, const QString &chunk_text) -{ - const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size(); - for (int N = N_WORDS; N > 2; N--) { - // first try trigrams - QList text = generateGrams(chunk_text, N); - QString orText = text.join(" OR "); - const QString collection_names_str = collection_names.join("', '"); - const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'"); - if (!q.prepare(formatted_query)) - return false; - q.addBindValue(orText); - bool success = q.exec(); - if (!success) return false; - if (q.next()) { -#if defined(DEBUG) - qDebug() << "hit on" << N << "before" << chunk_text << "after" << orText; -#endif - q.previous(); - return true; - } - } - return true; -} - -void printResults(QSqlQuery &q) -{ - while (q.next()) { - int rowid = q.value(0).toInt(); - QString collection_name = q.value(1).toString(); - QString chunk_text = q.value(2).toString(); - - qDebug() << "rowid:" << rowid - << "collection_name:" << collection_name - << "chunk_text:" << chunk_text; - } -} - -const auto INSERT_COLLECTION_SQL = QLatin1String(R"( - insert into collections(collection_name, folder_id) values(?, ?); - )"); - -const auto DELETE_COLLECTION_SQL = QLatin1String(R"( - delete from collections where collection_name = ? and folder_id = ?; - )"); - -const auto COLLECTIONS_SQL = QLatin1String(R"( - create table collections(collection_name varchar, folder_id integer, unique(collection_name, folder_id)); - )"); - -const auto SELECT_FOLDERS_FROM_COLLECTIONS_SQL = QLatin1String(R"( - select folder_id from collections where collection_name = ?; - )"); - -const auto SELECT_COLLECTIONS_FROM_FOLDER_SQL = QLatin1String(R"( - select collection_name from collections where folder_id = ?; - )"); - -const auto SELECT_COLLECTIONS_SQL = QLatin1String(R"( - select c.collection_name, f.folder_path, f.id - from collections c - join folders f on c.folder_id = f.id - order by c.collection_name asc, f.folder_path asc; - )"); - -bool addCollection(QSqlQuery &q, const QString &collection_name, int folder_id) -{ - if (!q.prepare(INSERT_COLLECTION_SQL)) - return false; - q.addBindValue(collection_name); - q.addBindValue(folder_id); - return q.exec(); -} - -bool removeCollection(QSqlQuery &q, const QString &collection_name, int folder_id) -{ - if (!q.prepare(DELETE_COLLECTION_SQL)) - return false; - q.addBindValue(collection_name); - q.addBindValue(folder_id); - return q.exec(); -} - -bool selectFoldersFromCollection(QSqlQuery &q, const QString &collection_name, QList *folderIds) { - if (!q.prepare(SELECT_FOLDERS_FROM_COLLECTIONS_SQL)) - return false; - q.addBindValue(collection_name); - if (!q.exec()) - return false; - while (q.next()) - folderIds->append(q.value(0).toInt()); - return true; -} - -bool selectCollectionsFromFolder(QSqlQuery &q, int folder_id, QList *collections) { - if (!q.prepare(SELECT_COLLECTIONS_FROM_FOLDER_SQL)) - return false; - q.addBindValue(folder_id); - if (!q.exec()) - return false; - while (q.next()) - collections->append(q.value(0).toString()); - return true; -} - -struct CollectionEntry { - QString collection; - QString folder_path; - int folder_id = -1; -}; - -bool selectAllFromCollections(QSqlQuery &q, QList *collections) { - if (!q.prepare(SELECT_COLLECTIONS_SQL)) - return false; - if (!q.exec()) - return false; - while (q.next()) { - CollectionEntry e; - e.collection = q.value(0).toString(); - e.folder_path = q.value(1).toString(); - e.folder_id = q.value(0).toInt(); - collections->append(e); - } - return true; -} - -const auto INSERT_FOLDERS_SQL = QLatin1String(R"( - insert into folders(folder_path) values(?); - )"); - -const auto DELETE_FOLDERS_SQL = QLatin1String(R"( - delete from folders where id = ?; - )"); - -const auto SELECT_FOLDERS_FROM_PATH_SQL = QLatin1String(R"( - select id from folders where folder_path = ?; - )"); - -const auto SELECT_FOLDERS_FROM_ID_SQL = QLatin1String(R"( - select folder_path from folders where id = ?; - )"); - -const auto SELECT_ALL_FOLDERPATHS_SQL = QLatin1String(R"( - select folder_path from folders; - )"); - -const auto FOLDERS_SQL = QLatin1String(R"( - create table folders(id integer primary key, folder_path varchar unique); - )"); - -bool addFolderToDB(QSqlQuery &q, const QString &folder_path, int *folder_id) -{ - if (!q.prepare(INSERT_FOLDERS_SQL)) - return false; - q.addBindValue(folder_path); - if (!q.exec()) - return false; - *folder_id = q.lastInsertId().toInt(); - return true; -} - -bool removeFolderFromDB(QSqlQuery &q, int folder_id) { - if (!q.prepare(DELETE_FOLDERS_SQL)) - return false; - q.addBindValue(folder_id); - return q.exec(); -} - -bool selectFolder(QSqlQuery &q, const QString &folder_path, int *id) { - if (!q.prepare(SELECT_FOLDERS_FROM_PATH_SQL)) - return false; - q.addBindValue(folder_path); - if (!q.exec()) - return false; - Q_ASSERT(q.size() < 2); - if (q.next()) - *id = q.value(0).toInt(); - return true; -} - -bool selectFolder(QSqlQuery &q, int id, QString *folder_path) { - if (!q.prepare(SELECT_FOLDERS_FROM_ID_SQL)) - return false; - q.addBindValue(id); - if (!q.exec()) - return false; - Q_ASSERT(q.size() < 2); - if (q.next()) - *folder_path = q.value(0).toString(); - return true; -} - -bool selectAllFolderPaths(QSqlQuery &q, QList *folder_paths) { - if (!q.prepare(SELECT_ALL_FOLDERPATHS_SQL)) - return false; - if (!q.exec()) - return false; - while (q.next()) - folder_paths->append(q.value(0).toString()); - return true; -} - -const auto INSERT_DOCUMENTS_SQL = QLatin1String(R"( - insert into documents(folder_id, document_time, document_path) values(?, ?, ?); - )"); - -const auto UPDATE_DOCUMENT_TIME_SQL = QLatin1String(R"( - update documents set document_time = ? where id = ?; - )"); - -const auto DELETE_DOCUMENTS_SQL = QLatin1String(R"( - delete from documents where id = ?; - )"); - -const auto DOCUMENTS_SQL = QLatin1String(R"( - create table documents(id integer primary key, folder_id integer, document_time integer, document_path varchar unique); - )"); - -const auto SELECT_DOCUMENT_SQL = QLatin1String(R"( - select id, document_time from documents where document_path = ?; - )"); - -const auto SELECT_DOCUMENTS_SQL = QLatin1String(R"( - select id from documents where folder_id = ?; - )"); - -const auto SELECT_ALL_DOCUMENTS_SQL = QLatin1String(R"( - select id, document_path from documents; - )"); - -bool addDocument(QSqlQuery &q, int folder_id, qint64 document_time, const QString &document_path, int *document_id) -{ - if (!q.prepare(INSERT_DOCUMENTS_SQL)) - return false; - q.addBindValue(folder_id); - q.addBindValue(document_time); - q.addBindValue(document_path); - if (!q.exec()) - return false; - *document_id = q.lastInsertId().toInt(); - return true; -} - -bool removeDocument(QSqlQuery &q, int document_id) { - if (!q.prepare(DELETE_DOCUMENTS_SQL)) - return false; - q.addBindValue(document_id); - return q.exec(); -} - -bool updateDocument(QSqlQuery &q, int id, qint64 document_time) -{ - if (!q.prepare(UPDATE_DOCUMENT_TIME_SQL)) - return false; - q.addBindValue(id); - q.addBindValue(document_time); - return q.exec(); -} - -bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, qint64 *document_time) { - if (!q.prepare(SELECT_DOCUMENT_SQL)) - return false; - q.addBindValue(document_path); - if (!q.exec()) - return false; - Q_ASSERT(q.size() < 2); - if (q.next()) { - *id = q.value(0).toInt(); - *document_time = q.value(1).toLongLong(); - } - return true; -} - -bool selectDocuments(QSqlQuery &q, int folder_id, QList *documentIds) { - if (!q.prepare(SELECT_DOCUMENTS_SQL)) - return false; - q.addBindValue(folder_id); - if (!q.exec()) - return false; - while (q.next()) - documentIds->append(q.value(0).toInt()); - return true; -} - -QSqlError initDb() -{ - QString dbPath = Download::globalInstance()->downloadLocalModelsPath() - + QString("localdocs_v%1.db").arg(LOCALDOCS_VERSION); - QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE"); - db.setDatabaseName(dbPath); - - if (!db.open()) - return db.lastError(); - - QStringList tables = db.tables(); - if (tables.contains("chunks", Qt::CaseInsensitive)) - return QSqlError(); - - QSqlQuery q; - if (!q.exec(CHUNKS_SQL)) - return q.lastError(); - - if (!q.exec(FTS_CHUNKS_SQL)) - return q.lastError(); - - if (!q.exec(COLLECTIONS_SQL)) - return q.lastError(); - - if (!q.exec(FOLDERS_SQL)) - return q.lastError(); - - if (!q.exec(DOCUMENTS_SQL)) - return q.lastError(); - -#if defined(DEBUG_EXAMPLE) - // Add a folder - QString folder_path = "/example/folder"; - int folder_id; - if (!addFolderToDB(q, folder_path, &folder_id)) { - qDebug() << "Error adding folder:" << q.lastError().text(); - return q.lastError(); - } - - // Add a collection - QString collection_name = "Example Collection"; - if (!addCollection(q, collection_name, folder_id)) { - qDebug() << "Error adding collection:" << q.lastError().text(); - return q.lastError(); - } - - // Add a document - int document_time = 123456789; - int document_id; - QString document_path = "/example/folder/document1.txt"; - if (!addDocument(q, folder_id, document_time, document_path, &document_id)) { - qDebug() << "Error adding document:" << q.lastError().text(); - return q.lastError(); - } - - // Add chunks to the document - QString chunk_text1 = "This is an example chunk."; - QString chunk_text2 = "Another example chunk."; - QString embedding_path = "/example/embeddings/embedding1.bin"; - int embedding_id = 1; - - if (!addChunk(q, document_id, 1, chunk_text1, embedding_id, embedding_path) || - !addChunk(q, document_id, 2, chunk_text2, embedding_id, embedding_path)) { - qDebug() << "Error adding chunks:" << q.lastError().text(); - return q.lastError(); - } - - // Perform a search - QList collection_names = {collection_name}; - QString search_text = "example"; - if (!selectChunk(q, collection_names, search_text)) { - qDebug() << "Error selecting chunks:" << q.lastError().text(); - return q.lastError(); - } - - // Print the results - printResults(q); -#endif - - return QSqlError(); -} - -Database::Database() - : QObject(nullptr) - , m_watcher(new QFileSystemWatcher(this)) -{ - moveToThread(&m_dbThread); - connect(&m_dbThread, &QThread::started, this, &Database::start); - m_dbThread.setObjectName("database"); - m_dbThread.start(); -} - -void Database::handleDocumentErrorAndScheduleNext(const QString &errorMessage, - int document_id, const QString &document_path, const QSqlError &error) -{ - qWarning() << errorMessage << document_id << document_path << error.text(); - if (!m_docsToScan.isEmpty()) - QTimer::singleShot(0, this, &Database::scanQueue); -} - -void Database::chunkStream(QTextStream &stream, int document_id) -{ - const int chunkSize = 256; - int chunk_id = 0; - int charCount = 0; - QList words; - - while (!stream.atEnd()) { - QString word; - stream >> word; - charCount += word.length(); - words.append(word); - if (charCount + words.size() - 1 >= chunkSize || stream.atEnd()) { - const QString chunk = words.join(" "); - QSqlQuery q; - if (!addChunk(q, - document_id, - ++chunk_id, - chunk, - 0 /*embedding_id*/, - QString() /*embedding_path*/ - )) { - qWarning() << "ERROR: Could not insert chunk into db" << q.lastError(); - } - words.clear(); - charCount = 0; - } - } -} - -void Database::scanQueue() -{ - if (m_docsToScan.isEmpty()) - return; - - DocumentInfo info = m_docsToScan.dequeue(); - - // Update info - info.doc.stat(); - - // If the doc has since been deleted or no longer readable, then we schedule more work and return - // leaving the cleanup for the cleanup handler - if (!info.doc.exists() || !info.doc.isReadable()) { - if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); - return; - } - - const int folder_id = info.folder; - const qint64 document_time = info.doc.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch(); - const QString document_path = info.doc.canonicalFilePath(); - -#if defined(DEBUG) - qDebug() << "scanning document" << document_path; -#endif - - // Check and see if we already have this document - QSqlQuery q; - int existing_id = -1; - qint64 existing_time = -1; - if (!selectDocument(q, document_path, &existing_id, &existing_time)) { - return handleDocumentErrorAndScheduleNext("ERROR: Cannot select document", - existing_id, document_path, q.lastError()); - } - - // If we have the document, we need to compare the last modification time and if it is newer - // we must rescan the document, otherwise return - if (existing_id != -1) { - Q_ASSERT(existing_time != -1); - if (document_time == existing_time) { - // No need to rescan, but we do have to schedule next - if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); - return; - } else { - if (!removeChunksByDocumentId(q, existing_id)) { - return handleDocumentErrorAndScheduleNext("ERROR: Cannot remove chunks of document", - existing_id, document_path, q.lastError()); - } - } - } - - // Update the document_time for an existing document, or add it for the first time now - int document_id = existing_id; - if (document_id != -1) { - if (!updateDocument(q, document_id, document_time)) { - return handleDocumentErrorAndScheduleNext("ERROR: Could not update document_time", - document_id, document_path, q.lastError()); - } - } else { - if (!addDocument(q, folder_id, document_time, document_path, &document_id)) { - return handleDocumentErrorAndScheduleNext("ERROR: Could not add document", - document_id, document_path, q.lastError()); - } - } - - QElapsedTimer timer; - timer.start(); - - QSqlDatabase::database().transaction(); - Q_ASSERT(document_id != -1); - if (info.doc.suffix() == QLatin1String("pdf")) { - QPdfDocument doc; - if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) { - return handleDocumentErrorAndScheduleNext("ERROR: Could not load pdf", - document_id, document_path, q.lastError()); - return; - } - QString text; - for (int i = 0; i < doc.pageCount(); ++i) { - const QPdfSelection selection = doc.getAllText(i); - text.append(selection.text()); - } - QTextStream stream(&text); - chunkStream(stream, document_id); - } else { - QFile file(document_path); - if (!file.open( QIODevice::ReadOnly)) { - return handleDocumentErrorAndScheduleNext("ERROR: Cannot open file for scanning", - existing_id, document_path, q.lastError()); - } - QTextStream stream(&file); - chunkStream(stream, document_id); - file.close(); - } - QSqlDatabase::database().commit(); - -#if defined(DEBUG) - qDebug() << "chunking" << document_path << "took" << timer.elapsed() << "ms"; -#endif - - if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue); -} - -void Database::scanDocuments(int folder_id, const QString &folder_path) -{ -#if defined(DEBUG) - qDebug() << "scanning folder for documents" << folder_path; -#endif - - static const QList extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", - "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "tex", "asc", "wks", - "wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx", - "plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht", - "mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps", - "xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1", - "wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1", - "wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm", - "xlt", "xln" }; - - QDir dir(folder_path); - Q_ASSERT(dir.exists()); - Q_ASSERT(dir.isReadable()); - QDirIterator it(folder_path, QDir::Readable | QDir::Files, QDirIterator::Subdirectories); - while (it.hasNext()) { - it.next(); - QFileInfo fileInfo = it.fileInfo(); - if (fileInfo.isDir()) { - addFolderToWatch(fileInfo.canonicalFilePath()); - continue; - } - - if (!extensions.contains(fileInfo.suffix())) - continue; - - DocumentInfo info; - info.folder = folder_id; - info.doc = fileInfo; - m_docsToScan.enqueue(info); - } - emit docsToScanChanged(); -} - -void Database::start() -{ - connect(m_watcher, &QFileSystemWatcher::directoryChanged, this, &Database::directoryChanged); - connect(this, &Database::docsToScanChanged, this, &Database::scanQueue); - if (!QSqlDatabase::drivers().contains("QSQLITE")) { - qWarning() << "ERROR: missing sqllite driver"; - } else { - QSqlError err = initDb(); - if (err.type() != QSqlError::NoError) - qWarning() << "ERROR: initializing db" << err.text(); - } - addCurrentFolders(); -} - -void Database::addCurrentFolders() -{ -#if defined(DEBUG) - qDebug() << "addCurrentFolders"; -#endif - - QSqlQuery q; - QList collections; - if (!selectAllFromCollections(q, &collections)) { - qWarning() << "ERROR: Cannot select collections" << q.lastError(); - return; - } - - for (auto e : collections) - addFolder(e.collection, e.folder_path); -} - -void Database::updateCollectionList() -{ -#if defined(DEBUG) - qDebug() << "updateCollectionList"; -#endif - - QSqlQuery q; - QList collections; - if (!selectAllFromCollections(q, &collections)) { - qWarning() << "ERROR: Cannot select collections" << q.lastError(); - return; - } - - QList collectionList; - QString currentCollectionName; - CollectionInfo currentCollectionInfo; - - for (auto e : collections) { - if (e.collection != currentCollectionName) { - if (!currentCollectionInfo.name.isEmpty()) - collectionList.append(currentCollectionInfo); - currentCollectionName = e.collection; - currentCollectionInfo.name = e.collection; - currentCollectionInfo.folders.clear(); - } - currentCollectionInfo.folders.append(e.folder_path); - } - if (!currentCollectionInfo.name.isEmpty()) - collectionList.append(currentCollectionInfo); - - emit collectionListUpdated(collectionList); -} - -void Database::addFolder(const QString &collection, const QString &path) -{ - QFileInfo info(path); - if (!info.exists() || !info.isReadable()) { - qWarning() << "ERROR: Cannot add folder that doesn't exist or not readable" << path; - return; - } - - QSqlQuery q; - int folder_id = -1; - - // See if the folder exists in the db - if (!selectFolder(q, path, &folder_id)) { - qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); - return; - } - - // Add the folder - if (folder_id == -1 && !addFolderToDB(q, path, &folder_id)) { - qWarning() << "ERROR: Cannot add folder to db with path" << path << q.lastError(); - return; - } - - Q_ASSERT(folder_id != -1); - - // See if the folder has already been added to the collection - QList folders; - if (!selectFoldersFromCollection(q, collection, &folders)) { - qWarning() << "ERROR: Cannot select folders from collections" << collection << q.lastError(); - return; - } - - if (!folders.contains(folder_id) && !addCollection(q, collection, folder_id)) { - qWarning() << "ERROR: Cannot add folder to collection" << collection << path << q.lastError(); - return; - } - - if (!addFolderToWatch(path)) - return; - - scanDocuments(folder_id, path); - updateCollectionList(); -} - -void Database::removeFolder(const QString &collection, const QString &path) -{ -#if defined(DEBUG) - qDebug() << "removeFolder" << path; -#endif - - QSqlQuery q; - int folder_id = -1; - - // See if the folder exists in the db - if (!selectFolder(q, path, &folder_id)) { - qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); - return; - } - - // If we don't have a folder_id in the db, then something bad has happened - Q_ASSERT(folder_id != -1); - if (folder_id == -1) { - qWarning() << "ERROR: Collected folder does not exist in db" << path; - m_watcher->removePath(path); - return; - } - - removeFolderInternal(collection, folder_id, path); -} - -void Database::removeFolderInternal(const QString &collection, int folder_id, const QString &path) -{ - // Determine if the folder is used by more than one collection - QSqlQuery q; - QList collections; - if (!selectCollectionsFromFolder(q, folder_id, &collections)) { - qWarning() << "ERROR: Cannot select collections from folder" << folder_id << q.lastError(); - return; - } - - // Remove it from the collections - if (!removeCollection(q, collection, folder_id)) { - qWarning() << "ERROR: Cannot remove collection" << collection << folder_id << q.lastError(); - return; - } - - // If the folder is associated with more than one collection, then return - if (collections.count() > 1) - return; - - // First remove all upcoming jobs associated with this folder by performing an opt-in filter - QQueue docsToScan; - for (DocumentInfo info : m_docsToScan) { - if (info.folder == folder_id) - continue; - docsToScan.append(info); - } - m_docsToScan = docsToScan; - emit docsToScanChanged(); - - // Get a list of all documents associated with folder - QList documentIds; - if (!selectDocuments(q, folder_id, &documentIds)) { - qWarning() << "ERROR: Cannot select documents" << folder_id << q.lastError(); - return; - } - - // Remove all chunks and documents associated with this folder - for (int document_id : documentIds) { - if (!removeChunksByDocumentId(q, document_id)) { - qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << q.lastError(); - return; - } - - if (!removeDocument(q, document_id)) { - qWarning() << "ERROR: Cannot remove document_id" << document_id << q.lastError(); - return; - } - } - - if (!removeFolderFromDB(q, folder_id)) { - qWarning() << "ERROR: Cannot remove folder_id" << folder_id << q.lastError(); - return; - } - - removeFolderFromWatch(path); - updateCollectionList(); -} - -bool Database::addFolderToWatch(const QString &path) -{ -#if defined(DEBUG) - qDebug() << "addFolderToWatch" << path; -#endif - - if (!m_watcher->addPath(path)) { - qWarning() << "ERROR: Cannot add path to file watcher:" << path; - return false; - } - return true; -} - -bool Database::removeFolderFromWatch(const QString &path) -{ -#if defined(DEBUG) - qDebug() << "removeFolderFromWatch" << path; -#endif - if (!m_watcher->removePath(path)) { - qWarning() << "ERROR: Cannot remove path from file watcher:" << path; - return false; - } - return true; -} - -void Database::retrieveFromDB(const QList &collections, const QString &text) -{ -#if defined(DEBUG) - qDebug() << "retrieveFromDB" << collections << text; -#endif - - QSqlQuery q; - if (!selectChunk(q, collections, text)) { - qDebug() << "ERROR: selecting chunks:" << q.lastError().text(); - return; - } - - QList results; - while (q.next()) { - int rowid = q.value(0).toInt(); - QString collection_name = q.value(1).toString(); - QString chunk_text = q.value(2).toString(); - results.append(chunk_text); -#if defined(DEBUG) - qDebug() << "retrieve rowid:" << rowid - << "collection_name:" << collection_name - << "chunk_text:" << chunk_text; -#endif - } - - emit retrieveResult(results); -} - -void Database::cleanDB() -{ -#if defined(DEBUG) - qDebug() << "cleanDB"; -#endif - - // Scan all folders in db to make sure they still exist - QSqlQuery q; - QList collections; - if (!selectAllFromCollections(q, &collections)) { - qWarning() << "ERROR: Cannot select collections" << q.lastError(); - return; - } - - for (auto e : collections) { - // Find the path for the folder - QFileInfo info(e.folder_path); - if (!info.exists() || !info.isReadable()) { -#if defined(DEBUG) - qDebug() << "clean db removing folder" << e.folder_id << e.folder_path; -#endif - removeFolderInternal(e.collection, e.folder_id, e.folder_path); - } - } - - // Scan all documents in db to make sure they still exist - if (!q.prepare(SELECT_ALL_DOCUMENTS_SQL)) { - qWarning() << "ERROR: Cannot prepare sql for select all documents" << q.lastError(); - return; - } - - if (!q.exec()) { - qWarning() << "ERROR: Cannot exec sql for select all documents" << q.lastError(); - return; - } - - while (q.next()) { - int document_id = q.value(0).toInt(); - QString document_path = q.value(1).toString(); - QFileInfo info(document_path); - if (info.exists() && info.isReadable()) - continue; - -#if defined(DEBUG) - qDebug() << "clean db removing document" << document_id << document_path; -#endif - - // Remove all chunks and documents that either don't exist or have become unreadable - QSqlQuery query; - if (!removeChunksByDocumentId(query, document_id)) { - qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << query.lastError(); - } - - if (!removeDocument(query, document_id)) { - qWarning() << "ERROR: Cannot remove document_id" << document_id << query.lastError(); - } - } - updateCollectionList(); -} - -void Database::directoryChanged(const QString &path) -{ -#if defined(DEBUG) - qDebug() << "directoryChanged" << path; -#endif - - QSqlQuery q; - int folder_id = -1; - - // Lookup the folder_id in the db - if (!selectFolder(q, path, &folder_id)) { - qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError(); - return; - } - - // If we don't have a folder_id in the db, then something bad has happened - Q_ASSERT(folder_id != -1); - if (folder_id == -1) { - qWarning() << "ERROR: Watched folder does not exist in db" << path; - m_watcher->removePath(path); - return; - } - - // Clean the database - cleanDB(); - - // Rescan the documents associated with the folder - scanDocuments(folder_id, path); -} class MyLocalDocs: public LocalDocs { }; Q_GLOBAL_STATIC(MyLocalDocs, localDocsInstance) @@ -1020,6 +9,7 @@ LocalDocs *LocalDocs::globalInstance() LocalDocs::LocalDocs() : QObject(nullptr) + , m_localDocsModel(new LocalDocsModel(this)) , m_database(new Database) { connect(this, &LocalDocs::requestAddFolder, m_database, @@ -1030,13 +20,18 @@ LocalDocs::LocalDocs() &Database::retrieveFromDB, Qt::QueuedConnection); connect(m_database, &Database::retrieveResult, this, &LocalDocs::handleRetrieveResult, Qt::QueuedConnection); - - addFolder("localdocs", "/home/atreat/dev/large_language_models/localdocs"); + connect(m_database, &Database::collectionListUpdated, + m_localDocsModel, &LocalDocsModel::handleCollectionListUpdated, Qt::QueuedConnection); } void LocalDocs::addFolder(const QString &collection, const QString &path) { - emit requestAddFolder(collection, path); + const QUrl url(path); + if (url.isLocalFile()) { + emit requestAddFolder(collection, url.toLocalFile()); + } else { + emit requestAddFolder(collection, path); + } } void LocalDocs::removeFolder(const QString &collection, const QString &path) @@ -1055,9 +50,3 @@ void LocalDocs::handleRetrieveResult(const QList &result) m_retrieveResult = result; emit receivedResult(); } - -void LocalDocs::handleCollectionListUpdated(const QList &collectionList) -{ - m_collectionList = collectionList; - emit collectionListChanged(); -} diff --git a/gpt4all-chat/localdocs.h b/gpt4all-chat/localdocs.h index af81775c..fae76b4a 100644 --- a/gpt4all-chat/localdocs.h +++ b/gpt4all-chat/localdocs.h @@ -1,80 +1,23 @@ #ifndef LOCALDOCS_H #define LOCALDOCS_H +#include "localdocsmodel.h" +#include "database.h" + #include -#include -#include -#include -#include -#include - -struct DocumentInfo -{ - int folder; - QFileInfo doc; -}; - -struct CollectionInfo { - Q_GADGET - Q_PROPERTY(QString name MEMBER name) -public: - QString name; - QList folders; -}; -Q_DECLARE_METATYPE(CollectionInfo) - -class Database : public QObject -{ - Q_OBJECT -public: - Database(); - -public Q_SLOTS: - void scanQueue(); - void scanDocuments(int folder_id, const QString &folder_path); - void addFolder(const QString &collection, const QString &path); - void removeFolder(const QString &collection, const QString &path); - void retrieveFromDB(const QList &collections, const QString &text); - void cleanDB(); - -Q_SIGNALS: - void docsToScanChanged(); - void retrieveResult(const QList &result); - void collectionListUpdated(const QList &collectionList); - -private Q_SLOTS: - void start(); - void directoryChanged(const QString &path); - bool addFolderToWatch(const QString &path); - bool removeFolderFromWatch(const QString &path); - void addCurrentFolders(); - void updateCollectionList(); - -private: - void removeFolderInternal(const QString &collection, int folder_id, const QString &path); - void chunkStream(QTextStream &stream, int document_id); - void handleDocumentErrorAndScheduleNext(const QString &errorMessage, - int document_id, const QString &document_path, const QSqlError &error); - -private: - QQueue m_docsToScan; - QList m_retrieve; - QThread m_dbThread; - QFileSystemWatcher *m_watcher; -}; class LocalDocs : public QObject { Q_OBJECT - Q_PROPERTY(QList collectionList READ collectionList NOTIFY collectionListChanged) + Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged) public: static LocalDocs *globalInstance(); - QList collectionList() const { return m_collectionList; } + LocalDocsModel *localDocsModel() const { return m_localDocsModel; } - void addFolder(const QString &collection, const QString &path); - void removeFolder(const QString &collection, const QString &path); + Q_INVOKABLE void addFolder(const QString &collection, const QString &path); + Q_INVOKABLE void removeFolder(const QString &collection, const QString &path); QList result() const { return m_retrieveResult; } void requestRetrieve(const QList &collections, const QString &text); @@ -84,16 +27,15 @@ Q_SIGNALS: void requestRemoveFolder(const QString &collection, const QString &path); void requestRetrieveFromDB(const QList &collections, const QString &text); void receivedResult(); - void collectionListChanged(); + void localDocsModelChanged(); private Q_SLOTS: void handleRetrieveResult(const QList &result); - void handleCollectionListUpdated(const QList &collectionList); private: + LocalDocsModel *m_localDocsModel; Database *m_database; QList m_retrieveResult; - QList m_collectionList; private: explicit LocalDocs(); diff --git a/gpt4all-chat/localdocsdb.h b/gpt4all-chat/localdocsdb.h new file mode 100644 index 00000000..cffb79ce --- /dev/null +++ b/gpt4all-chat/localdocsdb.h @@ -0,0 +1,105 @@ +#ifndef LOCALDOCS_H +#define LOCALDOCS_H + +#include "localdocsmodel.h" + +#include +#include +#include +#include +#include +#include + +struct DocumentInfo +{ + int folder; + QFileInfo doc; +}; + +struct CollectionItem { + QString collection; + QString folder_path; + int folder_id = -1; +}; +Q_DECLARE_METATYPE(CollectionItem) + +class Database : public QObject +{ + Q_OBJECT +public: + Database(); + +public Q_SLOTS: + void scanQueue(); + void scanDocuments(int folder_id, const QString &folder_path); + void addFolder(const QString &collection, const QString &path); + void removeFolder(const QString &collection, const QString &path); + void retrieveFromDB(const QList &collections, const QString &text); + void cleanDB(); + +Q_SIGNALS: + void docsToScanChanged(); + void retrieveResult(const QList &result); + void collectionListUpdated(const QList &collectionList); + +private Q_SLOTS: + void start(); + void directoryChanged(const QString &path); + bool addFolderToWatch(const QString &path); + bool removeFolderFromWatch(const QString &path); + void addCurrentFolders(); + void updateCollectionList(); + +private: + void removeFolderInternal(const QString &collection, int folder_id, const QString &path); + void chunkStream(QTextStream &stream, int document_id); + void handleDocumentErrorAndScheduleNext(const QString &errorMessage, + int document_id, const QString &document_path, const QSqlError &error); + +private: + QQueue m_docsToScan; + QList m_retrieve; + QThread m_dbThread; + QFileSystemWatcher *m_watcher; +}; + +class LocalDocs : public QObject +{ + Q_OBJECT + Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged) + +public: + static LocalDocs *globalInstance(); + + LocalDocsModel *localDocsModel() const { return m_localDocsModel; } + + void addFolder(const QString &collection, const QString &path); + void removeFolder(const QString &collection, const QString &path); + + QList result() const { return m_retrieveResult; } + void requestRetrieve(const QList &collections, const QString &text); + +Q_SIGNALS: + void requestAddFolder(const QString &collection, const QString &path); + void requestRemoveFolder(const QString &collection, const QString &path); + void requestRetrieveFromDB(const QList &collections, const QString &text); + void receivedResult(); + void localDocsModelChanged(); + +private Q_SLOTS: + void handleRetrieveResult(const QList &result); + void handleCollectionListUpdated(const QList &collectionList); + +private: + LocalDocsModel *m_localDocsModel; + Database *m_database; + QList m_retrieveResult; + QList m_collectionList; + +private: + explicit LocalDocs(); + ~LocalDocs() {} + friend class MyLocalDocs; +}; + +#endif // LOCALDOCS_H diff --git a/gpt4all-chat/localdocsmodel.cpp b/gpt4all-chat/localdocsmodel.cpp new file mode 100644 index 00000000..d6792788 --- /dev/null +++ b/gpt4all-chat/localdocsmodel.cpp @@ -0,0 +1,43 @@ +#include "localdocsmodel.h" + +LocalDocsModel::LocalDocsModel(QObject *parent) + : QAbstractListModel(parent) +{ +} + +int LocalDocsModel::rowCount(const QModelIndex &parent) const +{ + Q_UNUSED(parent); + return m_collectionList.size(); +} + +QVariant LocalDocsModel::data(const QModelIndex &index, int role) const +{ + if (!index.isValid() || index.row() < 0 || index.row() >= m_collectionList.size()) + return QVariant(); + + const CollectionItem item = m_collectionList.at(index.row()); + switch (role) { + case CollectionRole: + return item.collection; + case FolderPathRole: + return item.folder_path; + } + + return QVariant(); +} + +QHash LocalDocsModel::roleNames() const +{ + QHash roles; + roles[CollectionRole] = "collection"; + roles[FolderPathRole] = "folder_path"; + return roles; +} + +void LocalDocsModel::handleCollectionListUpdated(const QList &collectionList) +{ + beginResetModel(); + m_collectionList = collectionList; + endResetModel(); +} \ No newline at end of file diff --git a/gpt4all-chat/localdocsmodel.h b/gpt4all-chat/localdocsmodel.h new file mode 100644 index 00000000..2451537e --- /dev/null +++ b/gpt4all-chat/localdocsmodel.h @@ -0,0 +1,29 @@ +#ifndef LOCALDOCSMODEL_H +#define LOCALDOCSMODEL_H + +#include +#include "database.h" + +class LocalDocsModel : public QAbstractListModel +{ + Q_OBJECT + +public: + enum Roles { + CollectionRole = Qt::UserRole + 1, + FolderPathRole + }; + + explicit LocalDocsModel(QObject *parent = nullptr); + int rowCount(const QModelIndex & = QModelIndex()) const override; + QVariant data(const QModelIndex &index, int role) const override; + QHash roleNames() const override; + +public Q_SLOTS: + void handleCollectionListUpdated(const QList &collectionList); + +private: + QList m_collectionList; +}; + +#endif // LOCALDOCSMODEL_H diff --git a/gpt4all-chat/qml/AddCollectionDialog.qml b/gpt4all-chat/qml/AddCollectionDialog.qml new file mode 100644 index 00000000..c63fbb1b --- /dev/null +++ b/gpt4all-chat/qml/AddCollectionDialog.qml @@ -0,0 +1,98 @@ +import QtCore +import QtQuick +import QtQuick.Controls +import QtQuick.Controls.Basic +import QtQuick.Dialogs +import QtQuick.Layouts + +Dialog { + id: addCollectionDialog + anchors.centerIn: parent + opacity: 0.9 + padding: 20 + modal: true + + Theme { + id: theme + } + + property string collection: "" + property string folder_path: "" + + FolderDialog { + id: folderDialog + title: "Please choose a directory" + currentFolder: StandardPaths.writableLocation(StandardPaths.DocumentsLocation) + onAccepted: { + addCollectionDialog.folder_path = selectedFolder + } + } + + Row { + anchors.top: parent.top + anchors.left: parent.left + anchors.right: parent.right + height: childrenRect.height + spacing: 20 + + TextField { + id: collection + implicitWidth: 200 + horizontalAlignment: Text.AlignJustify + color: theme.textColor + background: Rectangle { + implicitWidth: 150 + color: theme.backgroundLighter + radius: 10 + } + padding: 10 + placeholderText: qsTr("Collection name...") + placeholderTextColor: theme.mutedTextColor + ToolTip.text: qsTr("Name of the collection to add (Required)") + ToolTip.visible: hovered + onEditingFinished: { + addCollectionDialog.collection = text + } + Accessible.role: Accessible.EditableText + Accessible.name: collection.text + Accessible.description: ToolTip.text + } + + MyTextField { + id: folderLabel + text: folder_path + readOnly: true + color: theme.textColor + implicitWidth: 300 + padding: 10 + placeholderText: qsTr("Folder path...") + placeholderTextColor: theme.mutedTextColor + ToolTip.text: qsTr("Folder path to documents (Required)") + ToolTip.visible: hovered + } + + MyButton { + text: qsTr("Browse") + onClicked: { + folderDialog.open(); + } + } + + MyButton { + text: qsTr("Add") + enabled: addCollectionDialog.collection !== "" && addCollectionDialog.folder_path != "" + Accessible.role: Accessible.Button + Accessible.name: text + Accessible.description: qsTr("Add button") + DialogButtonBox.buttonRole: DialogButtonBox.AcceptRole + } + } + + background: Rectangle { + anchors.fill: parent + color: theme.backgroundDarkest + border.width: 1 + border.color: theme.dialogBorder + radius: 10 + } +} \ No newline at end of file diff --git a/gpt4all-chat/qml/LocalDocs.qml b/gpt4all-chat/qml/LocalDocs.qml index eb42d8b2..d960b16a 100644 --- a/gpt4all-chat/qml/LocalDocs.qml +++ b/gpt4all-chat/qml/LocalDocs.qml @@ -2,35 +2,175 @@ import QtQuick import QtQuick.Controls import QtQuick.Controls.Basic import QtQuick.Layouts +import localdocs -GridLayout { - columns: 2 - rowSpacing: 10 - columnSpacing: 10 - - Label { - text: qsTr("Collections:") - color: theme.textColor - Layout.row: 1 - Layout.column: 0 +Item { + AddCollectionDialog { + id: addCollectionDialog } - RowLayout { - spacing: 10 - Layout.row: 1 - Layout.column: 1 - MyComboBox { - id: comboBox - Layout.minimumWidth: 350 + Connections { + target: addCollectionDialog + function onAccepted() { + LocalDocs.addFolder(addCollectionDialog.collection, addCollectionDialog.folder_path) } - MyButton { - text: "Add" + } + + GridLayout { + id: gridLayout + columns: 2 + rowSpacing: 10 + columnSpacing: 10 + + Label { + id: contextItemsPerPrompt + text: qsTr("Context items per prompt:") + color: theme.textColor + Layout.row: 0 + Layout.column: 0 } - MyButton { - text: "Remove" + + MyTextField { + Layout.row: 0 + Layout.column: 1 } - MyButton { - text: "Rename" + + Label { + id: chunkLabel + text: qsTr("Chunksize:") + color: theme.textColor + Layout.row: 1 + Layout.column: 0 + } + + MyTextField { + id: chunkSizeTextField + Layout.row: 1 + Layout.column: 1 + } + } + + ScrollView { + id: scrollView + anchors.top: gridLayout.bottom + anchors.topMargin: 20 + anchors.bottom: newCollectionButton.top + anchors.bottomMargin: 10 + anchors.left: parent.left + anchors.right: parent.right + clip: true + contentHeight: 300 + ScrollBar.vertical.policy: ScrollBar.AlwaysOn + + background: Rectangle { + color: theme.backgroundLighter + } + + ListView { + id: listView + model: LocalDocs.localDocsModel + boundsBehavior: Flickable.StopAtBounds + headerPositioning: ListView.InlineHeader + header: Rectangle { + width: listView.width + height: collectionLabel.height + 40 + color: theme.backgroundDark + Label { + id: collectionLabel + anchors.verticalCenter: parent.verticalCenter + anchors.left: parent.left + anchors.margins: 20 + text: "Collection" + color: theme.textColor + font.bold: true + width: 200 + } + + Label { + anchors.left: collectionLabel.right + anchors.margins: 20 + anchors.verticalCenter: parent.verticalCenter + text: "Folder" + color: theme.textColor + font.bold: true + } + } + + delegate: Rectangle { + id: item + width: listView.width + height: buttons.height + 20 + color: index % 2 === 0 ? theme.backgroundLight : theme.backgroundLighter + property bool removing: false + + Text { + id: collectionId + anchors.verticalCenter: parent.verticalCenter + anchors.left: parent.left + anchors.margins: 20 + text: collection + elide: Text.ElideRight + color: theme.textColor + width: 200 + } + + Text { + id: folderId + anchors.left: collectionId.right + anchors.margins: 20 + anchors.verticalCenter: parent.verticalCenter + text: folder_path + elide: Text.ElideRight + color: theme.textColor + } + + Item { + id: buttons + anchors.right: parent.right + anchors.verticalCenter: parent.verticalCenter + anchors.margins: 20 + width: childrenRect.width + height: Math.max(removeButton.height, busyIndicator.height) + MyButton { + id: removeButton + anchors.verticalCenter: parent.verticalCenter + text: qsTr("Remove") + visible: !item.removing + onClicked: { + item.removing = true + LocalDocs.removeFolder(collection, folder_path) + } + } + BusyIndicator { + id: busyIndicator + anchors.verticalCenter: parent.verticalCenter + visible: item.removing + } + } + } + } + } + + MyButton { + id: newCollectionButton + anchors.right: parent.right + anchors.bottom: parent.bottom + text: qsTr("New collection") + onClicked: { + addCollectionDialog.open(); + } + } + + MyButton { + id: restoreDefaultsButton + anchors.left: parent.left + anchors.bottom: parent.bottom + text: qsTr("Restore Defaults") + Accessible.role: Accessible.Button + Accessible.name: text + Accessible.description: qsTr("Restores the settings dialog to a default state") + onClicked: { + // settingsDialog.restoreGenerationDefaults() } } } diff --git a/gpt4all-chat/qml/SettingsDialog.qml b/gpt4all-chat/qml/SettingsDialog.qml index a4e59789..e6d06f72 100644 --- a/gpt4all-chat/qml/SettingsDialog.qml +++ b/gpt4all-chat/qml/SettingsDialog.qml @@ -234,21 +234,28 @@ Dialog { anchors.top: parent.top anchors.left: parent.left anchors.right: parent.right - height: 1 ? localDocsButton.checked : 0 + height: localDocsButton.checked + color: theme.tabBorder + } + Rectangle { + anchors.bottom: parent.bottom + anchors.left: parent.left + anchors.right: parent.right + height: !localDocsButton.checked color: theme.tabBorder } Rectangle { anchors.top: parent.top anchors.bottom: parent.bottom anchors.left: parent.left - width: 1 ? localDocsButton.checked : 0 + width: localDocsButton.checked color: theme.tabBorder } Rectangle { anchors.top: parent.top anchors.bottom: parent.bottom anchors.right: parent.right - width: 1 ? localDocsButton.checked : 0 + width: localDocsButton.checked color: theme.tabBorder } }