mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-19 09:46:58 +00:00
database: fix DocumentInfo stack-use-after-return
`info` is local to scanQueue, so we have to store a copy in the DocumentReader to extend its lifetime. Not sure how this even worked before. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
26356f872e
commit
3acbfcf92c
@ -1111,9 +1111,9 @@ class DocumentReader {
|
||||
public:
|
||||
struct Metadata { QString title, author, subject, keywords; };
|
||||
|
||||
static std::unique_ptr<DocumentReader> fromDocument(const DocumentInfo &info);
|
||||
static std::unique_ptr<DocumentReader> fromDocument(DocumentInfo info);
|
||||
|
||||
const DocumentInfo &doc () const { return *m_info; }
|
||||
const DocumentInfo &doc () const { return m_info; }
|
||||
const Metadata &metadata() const { return m_metadata; }
|
||||
const std::optional<QString> &word () const { return m_word; }
|
||||
const std::optional<QString> &nextWord() { m_word = advance(); return m_word; }
|
||||
@ -1123,8 +1123,8 @@ public:
|
||||
virtual ~DocumentReader() = default;
|
||||
|
||||
protected:
|
||||
explicit DocumentReader(const DocumentInfo &info)
|
||||
: m_info(&info) {}
|
||||
explicit DocumentReader(DocumentInfo info)
|
||||
: m_info(std::move(info)) {}
|
||||
|
||||
void postInit(Metadata &&metadata = {})
|
||||
{
|
||||
@ -1134,7 +1134,7 @@ protected:
|
||||
|
||||
virtual std::optional<QString> advance() = 0;
|
||||
|
||||
const DocumentInfo *m_info;
|
||||
DocumentInfo m_info;
|
||||
Metadata m_metadata;
|
||||
std::optional<QString> m_word;
|
||||
};
|
||||
@ -1144,8 +1144,8 @@ namespace {
|
||||
#ifdef GPT4ALL_USE_QTPDF
|
||||
class PdfDocumentReader final : public DocumentReader {
|
||||
public:
|
||||
explicit PdfDocumentReader(const DocumentInfo &info)
|
||||
: DocumentReader(info)
|
||||
explicit PdfDocumentReader(DocumentInfo info)
|
||||
: DocumentReader(std::move(info))
|
||||
{
|
||||
QString path = info.file.canonicalFilePath();
|
||||
if (m_doc.load(path) != QPdfDocument::Error::None)
|
||||
@ -1185,8 +1185,8 @@ private:
|
||||
#else
|
||||
class PdfDocumentReader final : public DocumentReader {
|
||||
public:
|
||||
explicit PdfDocumentReader(const DocumentInfo &info)
|
||||
: DocumentReader(info)
|
||||
explicit PdfDocumentReader(DocumentInfo info)
|
||||
: DocumentReader(std::move(info))
|
||||
{
|
||||
QString path = info.file.canonicalFilePath();
|
||||
m_doc = FPDF_LoadDocument(path.toUtf8().constData(), nullptr);
|
||||
@ -1277,8 +1277,8 @@ private:
|
||||
|
||||
class WordDocumentReader final : public DocumentReader {
|
||||
public:
|
||||
explicit WordDocumentReader(const DocumentInfo &info)
|
||||
: DocumentReader(info)
|
||||
explicit WordDocumentReader(DocumentInfo info)
|
||||
: DocumentReader(std::move(info))
|
||||
, m_doc(info.file.canonicalFilePath().toStdString())
|
||||
{
|
||||
m_doc.open();
|
||||
@ -1370,8 +1370,8 @@ protected:
|
||||
|
||||
class TxtDocumentReader final : public DocumentReader {
|
||||
public:
|
||||
explicit TxtDocumentReader(const DocumentInfo &info)
|
||||
: DocumentReader(info)
|
||||
explicit TxtDocumentReader(DocumentInfo info)
|
||||
: DocumentReader(std::move(info))
|
||||
, m_file(info.file.canonicalFilePath())
|
||||
{
|
||||
if (!m_file.open(QIODevice::ReadOnly))
|
||||
@ -1412,13 +1412,13 @@ protected:
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<DocumentReader> DocumentReader::fromDocument(const DocumentInfo &doc)
|
||||
std::unique_ptr<DocumentReader> DocumentReader::fromDocument(DocumentInfo doc)
|
||||
{
|
||||
if (doc.isPdf())
|
||||
return std::make_unique<PdfDocumentReader>(doc);
|
||||
return std::make_unique<PdfDocumentReader>(std::move(doc));
|
||||
if (doc.isDocx())
|
||||
return std::make_unique<WordDocumentReader>(doc);
|
||||
return std::make_unique<TxtDocumentReader>(doc);
|
||||
return std::make_unique<WordDocumentReader>(std::move(doc));
|
||||
return std::make_unique<TxtDocumentReader>(std::move(doc));
|
||||
}
|
||||
|
||||
ChunkStreamer::ChunkStreamer(Database *database)
|
||||
@ -1426,12 +1426,12 @@ ChunkStreamer::ChunkStreamer(Database *database)
|
||||
|
||||
ChunkStreamer::~ChunkStreamer() = default;
|
||||
|
||||
void ChunkStreamer::setDocument(const DocumentInfo &doc, int documentId, const QString &embeddingModel)
|
||||
void ChunkStreamer::setDocument(DocumentInfo doc, int documentId, const QString &embeddingModel)
|
||||
{
|
||||
auto docKey = doc.key();
|
||||
if (!m_docKey || *m_docKey != docKey) {
|
||||
m_docKey = docKey;
|
||||
m_reader = DocumentReader::fromDocument(doc);
|
||||
m_reader = DocumentReader::fromDocument(std::move(doc));
|
||||
m_documentId = documentId;
|
||||
m_embeddingModel = embeddingModel;
|
||||
m_chunk.clear();
|
||||
@ -1441,7 +1441,8 @@ void ChunkStreamer::setDocument(const DocumentInfo &doc, int documentId, const Q
|
||||
if (m_database->m_documentIdCache.contains(documentId)) {
|
||||
QSqlQuery q(m_database->m_db);
|
||||
if (!m_database->removeChunksByDocumentId(q, documentId))
|
||||
handleDocumentError("ERROR: Cannot remove chunks of document", documentId, doc.file.canonicalPath(), q.lastError());
|
||||
handleDocumentError("ERROR: Cannot remove chunks of document",
|
||||
documentId, m_reader->doc().file.canonicalPath(), q.lastError());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ public:
|
||||
explicit ChunkStreamer(Database *database);
|
||||
~ChunkStreamer();
|
||||
|
||||
void setDocument(const DocumentInfo &doc, int documentId, const QString &embeddingModel);
|
||||
void setDocument(DocumentInfo doc, int documentId, const QString &embeddingModel);
|
||||
std::optional<DocumentInfo::key_type> currentDocKey() const;
|
||||
void reset();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user