From 3acbfcf92c9d1389052fd768073756347fccffb7 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 4 Feb 2025 12:58:32 -0500 Subject: [PATCH] database: fix DocumentInfo stack-use-after-return `info` is local to scanQueue, so we have to store a copy in the DocumentReader to extend its lifetime. Not sure how this even worked before. Signed-off-by: Jared Van Bortel --- gpt4all-chat/src/database.cpp | 45 ++++++++++++++++++----------------- gpt4all-chat/src/database.h | 2 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index f118c304..6f9fa5e7 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -1111,9 +1111,9 @@ class DocumentReader { public: struct Metadata { QString title, author, subject, keywords; }; - static std::unique_ptr fromDocument(const DocumentInfo &info); + static std::unique_ptr fromDocument(DocumentInfo info); - const DocumentInfo &doc () const { return *m_info; } + const DocumentInfo &doc () const { return m_info; } const Metadata &metadata() const { return m_metadata; } const std::optional &word () const { return m_word; } const std::optional &nextWord() { m_word = advance(); return m_word; } @@ -1123,8 +1123,8 @@ public: virtual ~DocumentReader() = default; protected: - explicit DocumentReader(const DocumentInfo &info) - : m_info(&info) {} + explicit DocumentReader(DocumentInfo info) + : m_info(std::move(info)) {} void postInit(Metadata &&metadata = {}) { @@ -1134,9 +1134,9 @@ protected: virtual std::optional advance() = 0; - const DocumentInfo *m_info; - Metadata m_metadata; - std::optional m_word; + DocumentInfo m_info; + Metadata m_metadata; + std::optional m_word; }; namespace { @@ -1144,8 +1144,8 @@ namespace { #ifdef GPT4ALL_USE_QTPDF class PdfDocumentReader final : public DocumentReader { public: - explicit PdfDocumentReader(const DocumentInfo &info) - : DocumentReader(info) + explicit PdfDocumentReader(DocumentInfo info) + : DocumentReader(std::move(info)) { QString path = info.file.canonicalFilePath(); if (m_doc.load(path) != QPdfDocument::Error::None) @@ -1185,8 +1185,8 @@ private: #else class PdfDocumentReader final : public DocumentReader { public: - explicit PdfDocumentReader(const DocumentInfo &info) - : DocumentReader(info) + explicit PdfDocumentReader(DocumentInfo info) + : DocumentReader(std::move(info)) { QString path = info.file.canonicalFilePath(); m_doc = FPDF_LoadDocument(path.toUtf8().constData(), nullptr); @@ -1277,8 +1277,8 @@ private: class WordDocumentReader final : public DocumentReader { public: - explicit WordDocumentReader(const DocumentInfo &info) - : DocumentReader(info) + explicit WordDocumentReader(DocumentInfo info) + : DocumentReader(std::move(info)) , m_doc(info.file.canonicalFilePath().toStdString()) { m_doc.open(); @@ -1370,8 +1370,8 @@ protected: class TxtDocumentReader final : public DocumentReader { public: - explicit TxtDocumentReader(const DocumentInfo &info) - : DocumentReader(info) + explicit TxtDocumentReader(DocumentInfo info) + : DocumentReader(std::move(info)) , m_file(info.file.canonicalFilePath()) { if (!m_file.open(QIODevice::ReadOnly)) @@ -1412,13 +1412,13 @@ protected: } // namespace -std::unique_ptr DocumentReader::fromDocument(const DocumentInfo &doc) +std::unique_ptr DocumentReader::fromDocument(DocumentInfo doc) { if (doc.isPdf()) - return std::make_unique(doc); + return std::make_unique(std::move(doc)); if (doc.isDocx()) - return std::make_unique(doc); - return std::make_unique(doc); + return std::make_unique(std::move(doc)); + return std::make_unique(std::move(doc)); } ChunkStreamer::ChunkStreamer(Database *database) @@ -1426,12 +1426,12 @@ ChunkStreamer::ChunkStreamer(Database *database) ChunkStreamer::~ChunkStreamer() = default; -void ChunkStreamer::setDocument(const DocumentInfo &doc, int documentId, const QString &embeddingModel) +void ChunkStreamer::setDocument(DocumentInfo doc, int documentId, const QString &embeddingModel) { auto docKey = doc.key(); if (!m_docKey || *m_docKey != docKey) { m_docKey = docKey; - m_reader = DocumentReader::fromDocument(doc); + m_reader = DocumentReader::fromDocument(std::move(doc)); m_documentId = documentId; m_embeddingModel = embeddingModel; m_chunk.clear(); @@ -1441,7 +1441,8 @@ void ChunkStreamer::setDocument(const DocumentInfo &doc, int documentId, const Q if (m_database->m_documentIdCache.contains(documentId)) { QSqlQuery q(m_database->m_db); if (!m_database->removeChunksByDocumentId(q, documentId)) - handleDocumentError("ERROR: Cannot remove chunks of document", documentId, doc.file.canonicalPath(), q.lastError()); + handleDocumentError("ERROR: Cannot remove chunks of document", + documentId, m_reader->doc().file.canonicalPath(), q.lastError()); } } } diff --git a/gpt4all-chat/src/database.h b/gpt4all-chat/src/database.h index 30c13942..45924b36 100644 --- a/gpt4all-chat/src/database.h +++ b/gpt4all-chat/src/database.h @@ -171,7 +171,7 @@ public: explicit ChunkStreamer(Database *database); ~ChunkStreamer(); - void setDocument(const DocumentInfo &doc, int documentId, const QString &embeddingModel); + void setDocument(DocumentInfo doc, int documentId, const QString &embeddingModel); std::optional currentDocKey() const; void reset();