mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-05 10:30:29 +00:00
LocalDocs version 2 with text embeddings.
This commit is contained in:
@@ -8,10 +8,18 @@
|
||||
#include <QThread>
|
||||
#include <QFileSystemWatcher>
|
||||
|
||||
class Embeddings;
|
||||
class EmbeddingLLM;
|
||||
struct DocumentInfo
|
||||
{
|
||||
int folder;
|
||||
QFileInfo doc;
|
||||
int currentPage = 0;
|
||||
size_t currentPosition = 0;
|
||||
bool currentlyProcessing = false;
|
||||
bool isPdf() const {
|
||||
return doc.suffix() == QLatin1String("pdf");
|
||||
}
|
||||
};
|
||||
|
||||
struct ResultInfo {
|
||||
@@ -30,6 +38,11 @@ struct CollectionItem {
|
||||
QString folder_path;
|
||||
int folder_id = -1;
|
||||
bool installed = false;
|
||||
bool indexing = false;
|
||||
int currentDocsToIndex = 0;
|
||||
int totalDocsToIndex = 0;
|
||||
size_t currentBytesToIndex = 0;
|
||||
size_t totalBytesToIndex = 0;
|
||||
};
|
||||
Q_DECLARE_METATYPE(CollectionItem)
|
||||
|
||||
@@ -38,6 +51,7 @@ class Database : public QObject
|
||||
Q_OBJECT
|
||||
public:
|
||||
Database(int chunkSize);
|
||||
virtual ~Database();
|
||||
|
||||
public Q_SLOTS:
|
||||
void scanQueue();
|
||||
@@ -50,6 +64,16 @@ public Q_SLOTS:
|
||||
|
||||
Q_SIGNALS:
|
||||
void docsToScanChanged();
|
||||
void updateInstalled(int folder_id, bool b);
|
||||
void updateIndexing(int folder_id, bool b);
|
||||
void updateCurrentDocsToIndex(int folder_id, size_t currentDocsToIndex);
|
||||
void updateTotalDocsToIndex(int folder_id, size_t totalDocsToIndex);
|
||||
void subtractCurrentBytesToIndex(int folder_id, size_t subtractedBytes);
|
||||
void updateCurrentBytesToIndex(int folder_id, size_t currentBytesToIndex);
|
||||
void updateTotalBytesToIndex(int folder_id, size_t totalBytesToIndex);
|
||||
void addCollectionItem(const CollectionItem &item);
|
||||
void removeFolderById(int folder_id);
|
||||
void removeCollectionItem(const QString &collectionName);
|
||||
void collectionListUpdated(const QList<CollectionItem> &collectionList);
|
||||
|
||||
private Q_SLOTS:
|
||||
@@ -58,21 +82,31 @@ private Q_SLOTS:
|
||||
bool addFolderToWatch(const QString &path);
|
||||
bool removeFolderFromWatch(const QString &path);
|
||||
void addCurrentFolders();
|
||||
void updateCollectionList();
|
||||
|
||||
private:
|
||||
void removeFolderInternal(const QString &collection, int folder_id, const QString &path);
|
||||
void chunkStream(QTextStream &stream, int document_id, const QString &file,
|
||||
const QString &title, const QString &author, const QString &subject, const QString &keywords, int page);
|
||||
void handleDocumentErrorAndScheduleNext(const QString &errorMessage,
|
||||
size_t chunkStream(QTextStream &stream, int document_id, const QString &file,
|
||||
const QString &title, const QString &author, const QString &subject, const QString &keywords, int page,
|
||||
int maxChunks = -1);
|
||||
void removeEmbeddingsByDocumentId(int document_id);
|
||||
void scheduleNext(int folder_id, size_t countForFolder);
|
||||
void handleDocumentError(const QString &errorMessage,
|
||||
int document_id, const QString &document_path, const QSqlError &error);
|
||||
size_t countOfDocuments(int folder_id) const;
|
||||
size_t countOfBytes(int folder_id) const;
|
||||
DocumentInfo dequeueDocument();
|
||||
void removeFolderFromDocumentQueue(int folder_id);
|
||||
void enqueueDocumentInternal(const DocumentInfo &info, bool prepend = false);
|
||||
void enqueueDocuments(int folder_id, const QVector<DocumentInfo> &infos);
|
||||
|
||||
private:
|
||||
int m_chunkSize;
|
||||
QQueue<DocumentInfo> m_docsToScan;
|
||||
QMap<int, QQueue<DocumentInfo>> m_docsToScan;
|
||||
QList<ResultInfo> m_retrieve;
|
||||
QThread m_dbThread;
|
||||
QFileSystemWatcher *m_watcher;
|
||||
EmbeddingLLM *m_embLLM;
|
||||
Embeddings *m_embeddings;
|
||||
};
|
||||
|
||||
#endif // DATABASE_H
|
||||
|
Reference in New Issue
Block a user