mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-20 20:53:23 +00:00
Fixup the rescan. Fix debug output.
This commit is contained in:
parent
7e42af5f33
commit
d88eddbc20
@ -249,7 +249,7 @@ const auto SELECT_DOCUMENT_SQL = QLatin1String(R"(
|
|||||||
select id, document_time from documents where document_path = ?;
|
select id, document_time from documents where document_path = ?;
|
||||||
)");
|
)");
|
||||||
|
|
||||||
bool addDocument(QSqlQuery &q, int folder_id, int document_time, const QString &document_path, int *document_id)
|
bool addDocument(QSqlQuery &q, int folder_id, qint64 document_time, const QString &document_path, int *document_id)
|
||||||
{
|
{
|
||||||
if (!q.prepare(INSERT_DOCUMENTS_SQL))
|
if (!q.prepare(INSERT_DOCUMENTS_SQL))
|
||||||
return false;
|
return false;
|
||||||
@ -262,7 +262,7 @@ bool addDocument(QSqlQuery &q, int folder_id, int document_time, const QString &
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool updateDocument(QSqlQuery &q, int id, int document_time)
|
bool updateDocument(QSqlQuery &q, int id, qint64 document_time)
|
||||||
{
|
{
|
||||||
if (!q.prepare(UPDATE_DOCUMENT_TIME_SQL))
|
if (!q.prepare(UPDATE_DOCUMENT_TIME_SQL))
|
||||||
return false;
|
return false;
|
||||||
@ -271,7 +271,7 @@ bool updateDocument(QSqlQuery &q, int id, int document_time)
|
|||||||
return q.exec();
|
return q.exec();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, int *document_time) {
|
bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, qint64 *document_time) {
|
||||||
if (!q.prepare(SELECT_DOCUMENT_SQL))
|
if (!q.prepare(SELECT_DOCUMENT_SQL))
|
||||||
return false;
|
return false;
|
||||||
q.addBindValue(document_path);
|
q.addBindValue(document_path);
|
||||||
@ -280,7 +280,7 @@ bool selectDocument(QSqlQuery &q, const QString &document_path, int *id, int *do
|
|||||||
Q_ASSERT(q.size() < 2);
|
Q_ASSERT(q.size() < 2);
|
||||||
if (q.next()) {
|
if (q.next()) {
|
||||||
*id = q.value(0).toInt();
|
*id = q.value(0).toInt();
|
||||||
*document_time = q.value(1).toInt();
|
*document_time = q.value(1).toLongLong();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -407,28 +407,6 @@ void Database::chunkStream(QTextStream &stream, int document_id)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//void Database::chunkStream(QTextStream &stream, int document_id)
|
|
||||||
//{
|
|
||||||
// QString line;
|
|
||||||
// int chunk_id = 0;
|
|
||||||
// while (stream.readLineInto(&line)) {
|
|
||||||
// int chunkSize = 256;
|
|
||||||
// for (int i = 0; i < line.length(); i += chunkSize) {
|
|
||||||
// QString chunk = line.mid(i, chunkSize);
|
|
||||||
// QSqlQuery q;
|
|
||||||
// if (!addChunk(q,
|
|
||||||
// document_id,
|
|
||||||
// ++chunk_id,
|
|
||||||
// chunk,
|
|
||||||
// 0 /*embedding_id*/,
|
|
||||||
// QString() /*embedding_path*/
|
|
||||||
// )) {
|
|
||||||
// qWarning() << "ERROR: Could not insert chunk into db" << q.lastError();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
void Database::scanQueue()
|
void Database::scanQueue()
|
||||||
{
|
{
|
||||||
if (m_docsToScan.isEmpty())
|
if (m_docsToScan.isEmpty())
|
||||||
@ -440,13 +418,13 @@ void Database::scanQueue()
|
|||||||
const QString document_path = info.doc.canonicalFilePath();
|
const QString document_path = info.doc.canonicalFilePath();
|
||||||
|
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
qDebug() << "scanDocument" << folder_id << document_time << document_path;
|
qDebug() << "scanning document" << document_path;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Check and see if we already have this document
|
// Check and see if we already have this document
|
||||||
QSqlQuery q;
|
QSqlQuery q;
|
||||||
int existing_id = -1;
|
int existing_id = -1;
|
||||||
int existing_time = -1;
|
qint64 existing_time = -1;
|
||||||
if (!selectDocument(q, document_path, &existing_id, &existing_time)) {
|
if (!selectDocument(q, document_path, &existing_id, &existing_time)) {
|
||||||
return handleDocumentErrorAndScheduleNext("ERROR: Cannot select document",
|
return handleDocumentErrorAndScheduleNext("ERROR: Cannot select document",
|
||||||
existing_id, document_path, q.lastError());
|
existing_id, document_path, q.lastError());
|
||||||
@ -457,8 +435,9 @@ void Database::scanQueue()
|
|||||||
if (existing_id != -1) {
|
if (existing_id != -1) {
|
||||||
Q_ASSERT(existing_time != -1);
|
Q_ASSERT(existing_time != -1);
|
||||||
if (document_time == existing_time) {
|
if (document_time == existing_time) {
|
||||||
return handleDocumentErrorAndScheduleNext("WARNING: No need to rescan",
|
// No need to rescan, but we do have to schedule next
|
||||||
existing_id, document_path, q.lastError());
|
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
|
||||||
|
return;
|
||||||
} else {
|
} else {
|
||||||
if (!deleteChunksByDocumentId(q, existing_id)) {
|
if (!deleteChunksByDocumentId(q, existing_id)) {
|
||||||
return handleDocumentErrorAndScheduleNext("ERROR: Cannot delete chunks of document",
|
return handleDocumentErrorAndScheduleNext("ERROR: Cannot delete chunks of document",
|
||||||
@ -467,12 +446,6 @@ void Database::scanQueue()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
QFile file(document_path);
|
|
||||||
if (!file.open( QIODevice::ReadOnly)) {
|
|
||||||
return handleDocumentErrorAndScheduleNext("ERROR: Cannot open file for scanning",
|
|
||||||
existing_id, document_path, q.lastError());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the document_time for an existing document, or add it for the first time now
|
// Update the document_time for an existing document, or add it for the first time now
|
||||||
int document_id = existing_id;
|
int document_id = existing_id;
|
||||||
if (document_id != -1) {
|
if (document_id != -1) {
|
||||||
@ -493,7 +466,6 @@ void Database::scanQueue()
|
|||||||
QSqlDatabase::database().transaction();
|
QSqlDatabase::database().transaction();
|
||||||
Q_ASSERT(document_id != -1);
|
Q_ASSERT(document_id != -1);
|
||||||
if (info.doc.suffix() == QLatin1String("pdf")) {
|
if (info.doc.suffix() == QLatin1String("pdf")) {
|
||||||
file.close();
|
|
||||||
QPdfDocument doc;
|
QPdfDocument doc;
|
||||||
if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) {
|
if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) {
|
||||||
return handleDocumentErrorAndScheduleNext("ERROR: Could not load pdf",
|
return handleDocumentErrorAndScheduleNext("ERROR: Could not load pdf",
|
||||||
@ -508,6 +480,11 @@ void Database::scanQueue()
|
|||||||
QTextStream stream(&text);
|
QTextStream stream(&text);
|
||||||
chunkStream(stream, document_id);
|
chunkStream(stream, document_id);
|
||||||
} else {
|
} else {
|
||||||
|
QFile file(document_path);
|
||||||
|
if (!file.open( QIODevice::ReadOnly)) {
|
||||||
|
return handleDocumentErrorAndScheduleNext("ERROR: Cannot open file for scanning",
|
||||||
|
existing_id, document_path, q.lastError());
|
||||||
|
}
|
||||||
QTextStream stream(&file);
|
QTextStream stream(&file);
|
||||||
chunkStream(stream, document_id);
|
chunkStream(stream, document_id);
|
||||||
file.close();
|
file.close();
|
||||||
@ -515,7 +492,7 @@ void Database::scanQueue()
|
|||||||
QSqlDatabase::database().commit();
|
QSqlDatabase::database().commit();
|
||||||
|
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
qDebug() << "localdocs chunking" << document_path << "took" << timer.elapsed() << "milliseconds";
|
qDebug() << "chunking" << document_path << "took" << timer.elapsed() << "ms";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
|
if (!m_docsToScan.isEmpty()) QTimer::singleShot(0, this, &Database::scanQueue);
|
||||||
@ -524,7 +501,7 @@ void Database::scanQueue()
|
|||||||
void Database::scanDocuments(int folder_id, const QString &folder_path)
|
void Database::scanDocuments(int folder_id, const QString &folder_path)
|
||||||
{
|
{
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
qDebug() << "scanDocuments" << folder_id << folder_path;
|
qDebug() << "scanning folder for documents" << folder_path;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
QDir dir(folder_path);
|
QDir dir(folder_path);
|
||||||
@ -556,7 +533,6 @@ void Database::scanDocuments(int folder_id, const QString &folder_path)
|
|||||||
void Database::start()
|
void Database::start()
|
||||||
{
|
{
|
||||||
connect(m_watcher, &QFileSystemWatcher::directoryChanged, this, &Database::directoryChanged);
|
connect(m_watcher, &QFileSystemWatcher::directoryChanged, this, &Database::directoryChanged);
|
||||||
connect(m_watcher, &QFileSystemWatcher::fileChanged, this, &Database::fileChanged);
|
|
||||||
connect(this, &Database::docsToScanChanged, this, &Database::scanQueue);
|
connect(this, &Database::docsToScanChanged, this, &Database::scanQueue);
|
||||||
if (!QSqlDatabase::drivers().contains("QSQLITE")) {
|
if (!QSqlDatabase::drivers().contains("QSQLITE")) {
|
||||||
qWarning() << "ERROR: missing sqllite driver";
|
qWarning() << "ERROR: missing sqllite driver";
|
||||||
@ -684,17 +660,25 @@ void Database::directoryChanged(const QString &path)
|
|||||||
qDebug() << "directoryChanged" << path;
|
qDebug() << "directoryChanged" << path;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FIXME: Get the folder id
|
QSqlQuery q;
|
||||||
// Re-scan documents associated with folder_id
|
int folder_id = -1;
|
||||||
}
|
|
||||||
|
|
||||||
void Database::fileChanged(const QString &path)
|
// Lookup the folder_id in the db
|
||||||
{
|
if (!selectFolder(q, path, &folder_id)) {
|
||||||
#if defined(DEBUG)
|
qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError();
|
||||||
qDebug() << "fileChanged" << path;
|
return;
|
||||||
#endif
|
}
|
||||||
// FIXME: Get the folder id
|
|
||||||
// Re-scan documents associated with document_path
|
// If we don't have a folder_id in the db, then something bad has happened
|
||||||
|
Q_ASSERT(folder_id != -1);
|
||||||
|
if (folder_id == -1) {
|
||||||
|
qWarning() << "ERROR: Watched folder does not exist in db" << path;
|
||||||
|
m_watcher->removePath(path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rescan the documents associated with the folder
|
||||||
|
scanDocuments(folder_id, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
class MyLocalDocs: public LocalDocs { };
|
class MyLocalDocs: public LocalDocs { };
|
||||||
@ -741,9 +725,6 @@ void LocalDocs::requestRetrieve(const QList<QString> &collections, const QString
|
|||||||
|
|
||||||
void LocalDocs::retrieveResult(const QList<QString> &result)
|
void LocalDocs::retrieveResult(const QList<QString> &result)
|
||||||
{
|
{
|
||||||
#if defined(DEBUG)
|
|
||||||
qDebug() << "local docs retrieve" << result;
|
|
||||||
#endif
|
|
||||||
m_retrieveInProgress = false;
|
m_retrieveInProgress = false;
|
||||||
m_retrieveResult = result;
|
m_retrieveResult = result;
|
||||||
emit receivedResult();
|
emit receivedResult();
|
||||||
|
@ -34,7 +34,6 @@ Q_SIGNALS:
|
|||||||
private Q_SLOTS:
|
private Q_SLOTS:
|
||||||
void start();
|
void start();
|
||||||
void directoryChanged(const QString &path);
|
void directoryChanged(const QString &path);
|
||||||
void fileChanged(const QString &path);
|
|
||||||
bool addFolderToWatch(const QString &path);
|
bool addFolderToWatch(const QString &path);
|
||||||
bool removeFolderFromWatch(const QString &path);
|
bool removeFolderFromWatch(const QString &path);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user