Non-automatic localdocs.

Signed-off-by: Adam Treat <treat.adam@gmail.com>
This commit is contained in:
Adam Treat 2024-10-14 08:29:10 -04:00
parent 1789a3c6d7
commit bfcabe97f0
9 changed files with 126 additions and 16 deletions

View File

@ -29,6 +29,37 @@ MySettingsTab {
text: qsTr("LocalDocs Settings")
}
ColumnLayout {
spacing: 10
Label {
color: theme.styledTextColor
font.pixelSize: theme.fontSizeLarge
font.bold: true
text: qsTr("Behavior")
}
Rectangle {
Layout.fillWidth: true
height: 1
color: theme.settingsDivider
}
}
RowLayout {
MySettingsLabel {
id: automaticUpdateLabel
text: qsTr("Automatic Update")
helpText: qsTr("Whenever a file or folder changes it should automatically be re-indexed/embedded.")
}
MyCheckBox {
id: automaticUpdateBox
checked: MySettings.localDocsAutomaticUpdate
onClicked: {
MySettings.localDocsAutomaticUpdate = !MySettings.localDocsAutomaticUpdate
}
}
}
ColumnLayout {
spacing: 10
Label {

View File

@ -210,6 +210,9 @@ Rectangle {
if (model.currentEmbeddingsToIndex !== 0)
return theme.altProgressBackground
if (model.outOfDate)
return theme.altProgressBackground
if (model.forceIndexing)
return theme.red200
@ -247,6 +250,9 @@ Rectangle {
if (model.currentEmbeddingsToIndex !== 0)
return theme.altProgressText
if (model.outOfDate)
return theme.altProgressText
if (model.forceIndexing)
return theme.textErrorColor
@ -267,6 +273,9 @@ Rectangle {
if (model.forceIndexing)
return qsTr("REQUIRES UPDATE")
if (model.outOfDate)
return qsTr("OUT OF DATE")
if (model.installed)
return qsTr("READY")
@ -305,8 +314,11 @@ Rectangle {
if (model.forceIndexing)
return qsTr("This collection requires an update after version change")
if (model.outOfDate)
return qsTr("This collection is out of date and needs to be rebuilt")
if (model.installed)
return qsTr("Automatically reindexes upon changes to the folder")
return qsTr("This collection is up to date")
return qsTr("Installation in progress")
}

View File

@ -1014,8 +1014,9 @@ bool Database::initDb(const QString &modelPath, const QList<CollectionItem> &old
return true;
}
Database::Database(int chunkSize, QStringList extensions)
Database::Database(bool automaticUpdate, int chunkSize, QStringList extensions)
: QObject(nullptr)
, m_automaticUpdate(automaticUpdate)
, m_chunkSize(chunkSize)
, m_scannedFileExtensions(std::move(extensions))
, m_scanIntervalTimer(new QTimer(this))
@ -1100,6 +1101,7 @@ void Database::updateFolderToIndex(int folder_id, size_t countForFolder, bool se
sendChunkList(); // send any remaining embedding chunks to llm
item.indexing = false;
item.installed = true;
item.outOfDate = false;
// Set the last update if we are done
if (item.startUpdate > item.lastUpdate && item.currentEmbeddingsToIndex == 0)
@ -1599,6 +1601,41 @@ void Database::enqueueDocumentInternal(DocumentInfo &&info, bool prepend)
queue.insert(prepend ? queue.begin() : queue.end(), std::move(info));
}
bool Database::isOutOfDate(int folder_id, std::list<DocumentInfo> &&infos) const
{
for (auto &info : infos) {
// Update info
info.file.stat();
// If the doc has since been deleted or no longer readable, then we schedule more work and return
// leaving the cleanup for the cleanup handler
if (!info.file.exists() || !info.file.isReadable())
return true;
const qint64 document_time = info.file.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch();
const QString document_path = info.file.canonicalFilePath();
const bool currentlyProcessing = info.currentlyProcessing;
// Check and see if we already have this document
QSqlQuery q(m_db);
int existing_id = -1;
qint64 existing_time = -1;
if (!selectDocument(q, document_path, &existing_id, &existing_time)) {
handleDocumentError("ERROR: Cannot select document", existing_id, document_path, q.lastError());
continue;
}
// If not, then we are out of date
if (existing_id == -1)
return true;
// If we have the document and the time does not match, then we are out of date
if (document_time != existing_time)
return true;
}
return false;
}
void Database::enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos)
{
// enqueue all documents
@ -1766,7 +1803,7 @@ dequeue:
return updateFolderToIndex(folder_id, countForFolder);
}
void Database::scanDocuments(int folder_id, const QString &folder_path)
void Database::scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing)
{
#if defined(DEBUG)
qDebug() << "scanning folder for documents" << folder_path;
@ -1791,9 +1828,14 @@ void Database::scanDocuments(int folder_id, const QString &folder_path)
if (!infos.empty()) {
CollectionItem item = guiCollectionItem(folder_id);
item.indexing = true;
const bool shouldIndex = m_automaticUpdate || forceIndexing;
if (shouldIndex)
item.indexing = true;
else
item.outOfDate = isOutOfDate(folder_id, std::move(infos));
updateGuiForCollectionItem(item);
enqueueDocuments(folder_id, std::move(infos));
if (shouldIndex)
enqueueDocuments(folder_id, std::move(infos));
} else {
updateFolderToIndex(folder_id, 0, false);
}
@ -1847,7 +1889,7 @@ void Database::addCurrentFolders()
for (const auto &i : collections) {
if (!i.forceIndexing) {
addFolderToWatch(i.folder_path);
scanDocuments(i.folder_id, i.folder_path);
scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/);
}
}
@ -1982,7 +2024,7 @@ void Database::forceIndexing(const QString &collection, const QString &embedding
item.forceIndexing = false;
updateGuiForCollectionItem(item);
addFolderToWatch(folder.second);
scanDocuments(folder.first, folder.second);
scanDocuments(folder.first, folder.second, true /*forceIndexing*/);
}
}
@ -2014,7 +2056,7 @@ void Database::forceRebuildFolder(const QString &path)
item.currentEmbeddingsToIndex = item.totalEmbeddingsToIndex = 0;
updateGuiForCollectionItem(item);
scanDocuments(folder_id, path);
scanDocuments(folder_id, path, true /*forceIndexing*/);
}
bool Database::addFolder(const QString &collection, const QString &path, const QString &embedding_model)
@ -2058,7 +2100,7 @@ bool Database::addFolder(const QString &collection, const QString &path, const Q
// note: this is the existing embedding model if the collection was found
if (!item->embeddingModel.isNull()) {
addFolderToWatch(path);
scanDocuments(folder_id, path);
scanDocuments(folder_id, path, true /*forceIndexing*/);
}
}
return true;
@ -2665,7 +2707,7 @@ void Database::changeFileExtensions(const QStringList &extensions)
for (const auto &i: std::as_const(collections)) {
if (!i.forceIndexing)
scanDocuments(i.folder_id, i.folder_path);
scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/);
}
}
@ -2702,6 +2744,13 @@ void Database::directoryChanged(const QString &path)
updateCollectionStatistics();
// Rescan the documents associated with the folder
if (folder_id != -1)
scanDocuments(folder_id, path);
if (folder_id != -1) {
if (m_automaticUpdate) {
scanDocuments(folder_id, path, false /*forceIndexing*/);
} else {
CollectionItem item = guiCollectionItem(folder_id);
item.outOfDate = true;
updateGuiForCollectionItem(item);
}
}
}

View File

@ -134,6 +134,7 @@ struct CollectionItem {
bool installed = false;
bool indexing = false;
bool forceIndexing = false;
bool outOfDate = false;
QString error;
// progress
@ -189,7 +190,7 @@ class Database : public QObject
{
Q_OBJECT
public:
Database(int chunkSize, QStringList extensions);
Database(bool automaticUpdate, int chunkSize, QStringList extensions);
~Database() override;
bool isValid() const { return m_databaseValid; }
@ -198,7 +199,7 @@ public Q_SLOTS:
void start();
bool scanQueueInterrupted() const;
void scanQueueBatch();
void scanDocuments(int folder_id, const QString &folder_path);
void scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing);
void forceIndexing(const QString &collection, const QString &embedding_model);
void forceRebuildFolder(const QString &path);
bool addFolder(const QString &collection, const QString &path, const QString &embedding_model);
@ -250,6 +251,7 @@ private:
DocumentInfo dequeueDocument();
void removeFolderFromDocumentQueue(int folder_id);
void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false);
bool isOutOfDate(int folder_id, std::list<DocumentInfo> &&infos) const;
void enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos);
void scanQueue();
bool cleanDB();
@ -287,6 +289,7 @@ private:
private:
QSqlDatabase m_db;
bool m_automaticUpdate;
int m_chunkSize;
QStringList m_scannedFileExtensions;
QTimer *m_scanIntervalTimer;

View File

@ -26,7 +26,8 @@ LocalDocs::LocalDocs()
connect(MySettings::globalInstance(), &MySettings::localDocsFileExtensionsChanged, this, &LocalDocs::handleFileExtensionsChanged);
// Create the DB with the chunk size from settings
m_database = new Database(MySettings::globalInstance()->localDocsChunkSize(),
m_database = new Database(MySettings::globalInstance()->localDocsAutomaticUpdate(),
MySettings::globalInstance()->localDocsChunkSize(),
MySettings::globalInstance()->localDocsFileExtensions());
connect(this, &LocalDocs::requestStart, m_database,

View File

@ -121,6 +121,8 @@ QVariant LocalDocsModel::data(const QModelIndex &index, int role) const
return item.embeddingModel;
case UpdatingRole:
return item.indexing || item.currentEmbeddingsToIndex != 0;
case OutOfDateRole:
return item.outOfDate;
}
return QVariant();
@ -149,6 +151,7 @@ QHash<int, QByteArray> LocalDocsModel::roleNames() const
roles[FileCurrentlyProcessingRole] = "fileCurrentlyProcessing";
roles[EmbeddingModelRole] = "embeddingModel";
roles[UpdatingRole] = "updating";
roles[OutOfDateRole] = "outOfDate";
return roles;
}
@ -200,6 +203,8 @@ void LocalDocsModel::updateCollectionItem(const CollectionItem &item)
changed.append(FileCurrentlyProcessingRole);
if (stored.embeddingModel != item.embeddingModel)
changed.append(EmbeddingModelRole);
if (stored.outOfDate != item.outOfDate)
changed.append(OutOfDateRole);
// preserve collection name as we ignore it for matching
QString collection = stored.collection;

View File

@ -70,7 +70,8 @@ public:
LastUpdateRole,
FileCurrentlyProcessingRole,
EmbeddingModelRole,
UpdatingRole
UpdatingRole,
OutOfDateRole
};
explicit LocalDocsModel(QObject *parent = nullptr);

View File

@ -59,6 +59,7 @@ static const QVariantMap basicDefaults {
{ "localdocs/useRemoteEmbed", false },
{ "localdocs/nomicAPIKey", "" },
{ "localdocs/embedDevice", "Auto" },
{ "localdocs/automaticUpdate",false },
{ "network/attribution", "" },
};
@ -224,6 +225,7 @@ void MySettings::restoreLocalDocsDefaults()
setLocalDocsUseRemoteEmbed(basicDefaults.value("localdocs/useRemoteEmbed").toBool());
setLocalDocsNomicAPIKey(basicDefaults.value("localdocs/nomicAPIKey").toString());
setLocalDocsEmbedDevice(basicDefaults.value("localdocs/embedDevice").toString());
setLocalDocsAutomaticUpdate(basicDefaults.value("localdocs/automaticUpdate").toBool());
}
void MySettings::eraseModel(const ModelInfo &info)
@ -455,6 +457,7 @@ QStringList MySettings::localDocsFileExtensions() const { return getBasicSetting
bool MySettings::localDocsUseRemoteEmbed() const { return getBasicSetting("localdocs/useRemoteEmbed").toBool(); }
QString MySettings::localDocsNomicAPIKey() const { return getBasicSetting("localdocs/nomicAPIKey" ).toString(); }
QString MySettings::localDocsEmbedDevice() const { return getBasicSetting("localdocs/embedDevice" ).toString(); }
bool MySettings::localDocsAutomaticUpdate() const{ return getBasicSetting("localdocs/automaticUpdate").toBool(); }
QString MySettings::networkAttribution() const { return getBasicSetting("network/attribution" ).toString(); }
ChatTheme MySettings::chatTheme() const { return ChatTheme (getEnumSetting("chatTheme", chatThemeNames)); }
@ -473,6 +476,7 @@ void MySettings::setLocalDocsFileExtensions(const QStringList &value) { setBasic
void MySettings::setLocalDocsUseRemoteEmbed(bool value) { setBasicSetting("localdocs/useRemoteEmbed", value, "localDocsUseRemoteEmbed"); }
void MySettings::setLocalDocsNomicAPIKey(const QString &value) { setBasicSetting("localdocs/nomicAPIKey", value, "localDocsNomicAPIKey"); }
void MySettings::setLocalDocsEmbedDevice(const QString &value) { setBasicSetting("localdocs/embedDevice", value, "localDocsEmbedDevice"); }
void MySettings::setLocalDocsAutomaticUpdate(bool value) { setBasicSetting("localdocs/automaticUpdate",value, "localDocsAutomaticUpdate"); }
void MySettings::setNetworkAttribution(const QString &value) { setBasicSetting("network/attribution", value, "networkAttribution"); }
void MySettings::setChatTheme(ChatTheme value) { setBasicSetting("chatTheme", chatThemeNames .value(int(value))); }

View File

@ -64,6 +64,7 @@ class MySettings : public QObject
Q_PROPERTY(bool localDocsUseRemoteEmbed READ localDocsUseRemoteEmbed WRITE setLocalDocsUseRemoteEmbed NOTIFY localDocsUseRemoteEmbedChanged)
Q_PROPERTY(QString localDocsNomicAPIKey READ localDocsNomicAPIKey WRITE setLocalDocsNomicAPIKey NOTIFY localDocsNomicAPIKeyChanged)
Q_PROPERTY(QString localDocsEmbedDevice READ localDocsEmbedDevice WRITE setLocalDocsEmbedDevice NOTIFY localDocsEmbedDeviceChanged)
Q_PROPERTY(bool localDocsAutomaticUpdate READ localDocsAutomaticUpdate WRITE setLocalDocsAutomaticUpdate NOTIFY localDocsAutomaticUpdateChanged)
Q_PROPERTY(QString networkAttribution READ networkAttribution WRITE setNetworkAttribution NOTIFY networkAttributionChanged)
Q_PROPERTY(bool networkIsActive READ networkIsActive WRITE setNetworkIsActive NOTIFY networkIsActiveChanged)
Q_PROPERTY(bool networkUsageStatsActive READ networkUsageStatsActive WRITE setNetworkUsageStatsActive NOTIFY networkUsageStatsActiveChanged)
@ -185,6 +186,8 @@ public:
void setLocalDocsNomicAPIKey(const QString &value);
QString localDocsEmbedDevice() const;
void setLocalDocsEmbedDevice(const QString &value);
bool localDocsAutomaticUpdate() const;
void setLocalDocsAutomaticUpdate(bool value);
// Network settings
QString networkAttribution() const;
@ -232,6 +235,7 @@ Q_SIGNALS:
void localDocsUseRemoteEmbedChanged();
void localDocsNomicAPIKeyChanged();
void localDocsEmbedDeviceChanged();
void localDocsAutomaticUpdateChanged();
void networkAttributionChanged();
void networkIsActiveChanged();
void networkPortChanged();