From e47f4ddfb6f65d1503a6d2d6d194c65bdd910928 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Sat, 20 May 2023 20:04:55 -0400 Subject: [PATCH] Specify a large number of suffixes we will search for now. --- gpt4all-chat/localdocs.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/gpt4all-chat/localdocs.cpp b/gpt4all-chat/localdocs.cpp index 8527fa07..6ad63046 100644 --- a/gpt4all-chat/localdocs.cpp +++ b/gpt4all-chat/localdocs.cpp @@ -614,6 +614,16 @@ void Database::scanDocuments(int folder_id, const QString &folder_path) qDebug() << "scanning folder for documents" << folder_path; #endif + static const QList extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", + "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "tex", "asc", "wks", + "wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx", + "plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht", + "mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps", + "xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1", + "wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1", + "wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm", + "xlt", "xln" }; + QDir dir(folder_path); Q_ASSERT(dir.exists()); Q_ASSERT(dir.isReadable()); @@ -626,10 +636,8 @@ void Database::scanDocuments(int folder_id, const QString &folder_path) continue; } - if (fileInfo.suffix() != QLatin1String("pdf") - && fileInfo.suffix() != QLatin1String("txt")) { + if (!extensions.contains(fileInfo.suffix())) continue; - } DocumentInfo info; info.folder = folder_id;