mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-08 11:27:14 +00:00
Localdocs fixes (#3083)
Signed-off-by: Adam Treat <treat.adam@gmail.com>
This commit is contained in:
parent
1789a3c6d7
commit
f8dde82fda
@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Limit bm25 retrieval to only specified collections ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
|
||||||
|
- Fix bug removing documents because of a wrong case sensitive file suffix check ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
|
||||||
|
- Fix bug with hybrid localdocs search where database would get out of sync ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
|
||||||
|
- Fix GUI bug where the localdocs embedding device appears blank ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
|
||||||
|
|
||||||
## [3.4.1] - 2024-10-11
|
## [3.4.1] - 2024-10-11
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
@ -155,6 +163,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
||||||
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
|
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
|
||||||
|
|
||||||
|
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.4.1...HEAD
|
||||||
[3.4.1]: https://github.com/nomic-ai/gpt4all/compare/v3.4.0...v3.4.1
|
[3.4.1]: https://github.com/nomic-ai/gpt4all/compare/v3.4.0...v3.4.1
|
||||||
[3.4.0]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.4.0
|
[3.4.0]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.4.0
|
||||||
[3.3.1]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.3.1
|
[3.3.1]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.3.1
|
||||||
|
@ -4,9 +4,9 @@ include(../common/common.cmake)
|
|||||||
|
|
||||||
set(APP_VERSION_MAJOR 3)
|
set(APP_VERSION_MAJOR 3)
|
||||||
set(APP_VERSION_MINOR 4)
|
set(APP_VERSION_MINOR 4)
|
||||||
set(APP_VERSION_PATCH 1)
|
set(APP_VERSION_PATCH 2)
|
||||||
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||||
set(APP_VERSION "${APP_VERSION_BASE}")
|
set(APP_VERSION "${APP_VERSION_BASE}-dev0")
|
||||||
|
|
||||||
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
|
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
|
||||||
|
|
||||||
|
@ -176,6 +176,7 @@ MySettingsTab {
|
|||||||
ListElement { text: qsTr("Application default") }
|
ListElement { text: qsTr("Application default") }
|
||||||
Component.onCompleted: {
|
Component.onCompleted: {
|
||||||
MySettings.embeddingsDeviceList.forEach(d => append({"text": d}));
|
MySettings.embeddingsDeviceList.forEach(d => append({"text": d}));
|
||||||
|
deviceBox.updateModel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Accessible.name: deviceLabel.text
|
Accessible.name: deviceLabel.text
|
||||||
|
@ -233,12 +233,17 @@ static const QString SELECT_COUNT_CHUNKS_SQL = uR"(
|
|||||||
)"_s;
|
)"_s;
|
||||||
|
|
||||||
static const QString SELECT_CHUNKS_FTS_SQL = uR"(
|
static const QString SELECT_CHUNKS_FTS_SQL = uR"(
|
||||||
select id, bm25(chunks_fts) as score
|
select fts.id, bm25(chunks_fts) as score
|
||||||
from chunks_fts
|
from chunks_fts fts
|
||||||
|
join documents d on fts.document_id = d.id
|
||||||
|
join collection_items ci on d.folder_id = ci.folder_id
|
||||||
|
join collections co on ci.collection_id = co.id
|
||||||
where chunks_fts match ?
|
where chunks_fts match ?
|
||||||
order by score limit %1;
|
and co.name in ('%1')
|
||||||
|
order by score limit %2;
|
||||||
)"_s;
|
)"_s;
|
||||||
|
|
||||||
|
|
||||||
#define NAMED_PAIR(name, typea, a, typeb, b) \
|
#define NAMED_PAIR(name, typea, a, typeb, b) \
|
||||||
struct name { typea a; typeb b; }; \
|
struct name { typea a; typeb b; }; \
|
||||||
static bool operator==(const name &x, const name &y) { return x.a == y.a && x.b == y.b; } \
|
static bool operator==(const name &x, const name &y) { return x.a == y.a && x.b == y.b; } \
|
||||||
@ -349,6 +354,14 @@ static const QString UPDATE_LAST_UPDATE_TIME_SQL = uR"(
|
|||||||
update collections set last_update_time = ? where id = ?;
|
update collections set last_update_time = ? where id = ?;
|
||||||
)"_s;
|
)"_s;
|
||||||
|
|
||||||
|
static const QString FTS_INTEGRITY_SQL = uR"(
|
||||||
|
insert into chunks_fts(chunks_fts, rank) values('integrity-check', 1);
|
||||||
|
)"_s;
|
||||||
|
|
||||||
|
static const QString FTS_REBUILD_SQL = uR"(
|
||||||
|
insert into chunks_fts(chunks_fts) values('rebuild');
|
||||||
|
)"_s;
|
||||||
|
|
||||||
static bool addCollection(QSqlQuery &q, const QString &collection_name, const QDateTime &start_update,
|
static bool addCollection(QSqlQuery &q, const QString &collection_name, const QDateTime &start_update,
|
||||||
const QDateTime &last_update, const QString &embedding_model, CollectionItem &item)
|
const QDateTime &last_update, const QString &embedding_model, CollectionItem &item)
|
||||||
{
|
{
|
||||||
@ -1815,6 +1828,7 @@ void Database::start()
|
|||||||
m_databaseValid = false;
|
m_databaseValid = false;
|
||||||
} else {
|
} else {
|
||||||
cleanDB();
|
cleanDB();
|
||||||
|
ftsIntegrityCheck();
|
||||||
QSqlQuery q(m_db);
|
QSqlQuery q(m_db);
|
||||||
if (!refreshDocumentIdCache(q)) {
|
if (!refreshDocumentIdCache(q)) {
|
||||||
m_databaseValid = false;
|
m_databaseValid = false;
|
||||||
@ -2328,7 +2342,7 @@ QList<int> Database::searchBM25(const QString &query, const QList<QString> &coll
|
|||||||
QList<BM25Query> bm25Queries = queriesForFTS5(query);
|
QList<BM25Query> bm25Queries = queriesForFTS5(query);
|
||||||
|
|
||||||
QSqlQuery sqlQuery(m_db);
|
QSqlQuery sqlQuery(m_db);
|
||||||
sqlQuery.prepare(SELECT_CHUNKS_FTS_SQL.arg(k));
|
sqlQuery.prepare(SELECT_CHUNKS_FTS_SQL.arg(collections.join("', '"), QString::number(k)));
|
||||||
|
|
||||||
QList<SearchResult> results;
|
QList<SearchResult> results;
|
||||||
for (auto &bm25Query : std::as_const(bm25Queries)) {
|
for (auto &bm25Query : std::as_const(bm25Queries)) {
|
||||||
@ -2346,11 +2360,13 @@ QList<int> Database::searchBM25(const QString &query, const QList<QString> &coll
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
if (sqlQuery.at() != QSql::AfterLastRow) {
|
||||||
const int chunkId = sqlQuery.value(0).toInt();
|
do {
|
||||||
const float score = sqlQuery.value(1).toFloat();
|
const int chunkId = sqlQuery.value(0).toInt();
|
||||||
results.append({chunkId, score});
|
const float score = sqlQuery.value(1).toFloat();
|
||||||
} while (sqlQuery.next());
|
results.append({chunkId, score});
|
||||||
|
} while (sqlQuery.next());
|
||||||
|
}
|
||||||
|
|
||||||
k = qMin(k, results.size());
|
k = qMin(k, results.size());
|
||||||
std::partial_sort(
|
std::partial_sort(
|
||||||
@ -2524,6 +2540,26 @@ void Database::retrieveFromDB(const QList<QString> &collections, const QString &
|
|||||||
results->append(tempResults.value(id));
|
results->append(tempResults.value(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Database::ftsIntegrityCheck()
|
||||||
|
{
|
||||||
|
QSqlQuery q(m_db);
|
||||||
|
|
||||||
|
// Returns an error executing sql if it the integrity check fails
|
||||||
|
// See: https://www.sqlite.org/fts5.html#the_integrity_check_command
|
||||||
|
const bool success = q.exec(FTS_INTEGRITY_SQL);
|
||||||
|
if (!success && q.lastError().nativeErrorCode() != "267" /*SQLITE_CORRUPT_VTAB from sqlite header*/) {
|
||||||
|
qWarning() << "ERROR: Cannot prepare sql for fts integrity check" << q.lastError();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!success && !q.exec(FTS_REBUILD_SQL)) {
|
||||||
|
qWarning() << "ERROR: Cannot exec sql for fts rebuild" << q.lastError();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME This is very slow and non-interruptible and when we close the application and we're
|
// FIXME This is very slow and non-interruptible and when we close the application and we're
|
||||||
// cleaning a large table this can cause the app to take forever to shut down. This would ideally be
|
// cleaning a large table this can cause the app to take forever to shut down. This would ideally be
|
||||||
// interruptible and we'd continue 'cleaning' when we restart
|
// interruptible and we'd continue 'cleaning' when we restart
|
||||||
@ -2574,7 +2610,7 @@ bool Database::cleanDB()
|
|||||||
int document_id = q.value(0).toInt();
|
int document_id = q.value(0).toInt();
|
||||||
QString document_path = q.value(1).toString();
|
QString document_path = q.value(1).toString();
|
||||||
QFileInfo info(document_path);
|
QFileInfo info(document_path);
|
||||||
if (info.exists() && info.isReadable() && m_scannedFileExtensions.contains(info.suffix()))
|
if (info.exists() && info.isReadable() && m_scannedFileExtensions.contains(info.suffix(), Qt::CaseInsensitive))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
|
@ -41,10 +41,20 @@ class QTimer;
|
|||||||
|
|
||||||
/* Version 0: GPT4All v2.4.3, full-text search
|
/* Version 0: GPT4All v2.4.3, full-text search
|
||||||
* Version 1: GPT4All v2.5.3, embeddings in hsnwlib
|
* Version 1: GPT4All v2.5.3, embeddings in hsnwlib
|
||||||
* Version 2: GPT4All v3.0.0, embeddings in sqlite */
|
* Version 2: GPT4All v3.0.0, embeddings in sqlite
|
||||||
|
* Version 3: GPT4All v3.4.0, hybrid search
|
||||||
|
*/
|
||||||
|
|
||||||
// minimum supported version
|
// minimum supported version
|
||||||
static const int LOCALDOCS_MIN_VER = 1;
|
static const int LOCALDOCS_MIN_VER = 1;
|
||||||
|
|
||||||
|
// FIXME: (Adam) The next time we bump the version we should add triggers to manage the fts external
|
||||||
|
// content table as recommended in the official documentation to keep the fts index in sync
|
||||||
|
// See: https://www.sqlite.org/fts5.html#external_content_tables
|
||||||
|
|
||||||
|
// FIXME: (Adam) The fts virtual table should include the chunk_id explicitly instead of relying upon
|
||||||
|
// the id of the two tables to be in sync
|
||||||
|
|
||||||
// current version
|
// current version
|
||||||
static const int LOCALDOCS_VERSION = 3;
|
static const int LOCALDOCS_VERSION = 3;
|
||||||
|
|
||||||
@ -252,6 +262,7 @@ private:
|
|||||||
void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false);
|
void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false);
|
||||||
void enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos);
|
void enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos);
|
||||||
void scanQueue();
|
void scanQueue();
|
||||||
|
bool ftsIntegrityCheck();
|
||||||
bool cleanDB();
|
bool cleanDB();
|
||||||
void addFolderToWatch(const QString &path);
|
void addFolderToWatch(const QString &path);
|
||||||
void removeFolderFromWatch(const QString &path);
|
void removeFolderFromWatch(const QString &path);
|
||||||
|
Loading…
Reference in New Issue
Block a user