Add Nomic Embed model for atlas with localdocs.

2025-10-23 00:50:47 +00:00 · 2024-01-22 12:36:01 -05:00
parent eadc3b8d80
commit d14b95f4bd
15 changed files with 506 additions and 78 deletions
--- a/gpt4all-chat/modellist.cpp
+++ b/gpt4all-chat/modellist.cpp
@@ -10,6 +10,7 @@
 //#define USE_LOCAL_MODELSJSON

 #define DEFAULT_EMBEDDING_MODEL "all-MiniLM-L6-v2-f16.gguf"
+#define NOMIC_EMBEDDING_MODEL "nomic-embed-text-v1.txt"

 QString ModelInfo::id() const
 {
@@ -202,7 +203,8 @@ bool EmbeddingModels::filterAcceptsRow(int sourceRow,
 {
    QModelIndex index = sourceModel()->index(sourceRow, 0, sourceParent);
    bool isInstalled = sourceModel()->data(index, ModelList::InstalledRole).toBool();
-    bool isEmbedding = sourceModel()->data(index, ModelList::FilenameRole).toString() == DEFAULT_EMBEDDING_MODEL;
+    bool isEmbedding = sourceModel()->data(index, ModelList::FilenameRole).toString() == DEFAULT_EMBEDDING_MODEL ||
+        sourceModel()->data(index, ModelList::FilenameRole).toString() == NOMIC_EMBEDDING_MODEL;
    return isInstalled && isEmbedding;
 }

@@ -405,7 +407,7 @@ ModelInfo ModelList::defaultModelInfo() const
        const size_t ramrequired = defaultModel->ramrequired;

        // If we don't have either setting, then just use the first model that requires less than 16GB that is installed
-        if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16)
+        if (!hasUserDefaultName && !info->isOnline && ramrequired > 0 && ramrequired < 16)
            break;

        // If we have a user specified default and match, then use it
@@ -526,8 +528,8 @@ QVariant ModelList::dataInternal(const ModelInfo *info, int role) const
            return info->installed;
        case DefaultRole:
            return info->isDefault;
-        case ChatGPTRole:
-            return info->isChatGPT;
+        case OnlineRole:
+            return info->isOnline;
        case DisableGUIRole:
            return info->disableGUI;
        case DescriptionRole:
@@ -655,8 +657,8 @@ void ModelList::updateData(const QString &id, int role, const QVariant &value)
            info->installed = value.toBool(); break;
        case DefaultRole:
            info->isDefault = value.toBool(); break;
-        case ChatGPTRole:
-            info->isChatGPT = value.toBool(); break;
+        case OnlineRole:
+            info->isOnline = value.toBool(); break;
        case DisableGUIRole:
            info->disableGUI = value.toBool(); break;
        case DescriptionRole:
@@ -791,7 +793,7 @@ QString ModelList::clone(const ModelInfo &model)
    updateData(id, ModelList::FilenameRole, model.filename());
    updateData(id, ModelList::DirpathRole, model.dirpath);
    updateData(id, ModelList::InstalledRole, model.installed);
-    updateData(id, ModelList::ChatGPTRole, model.isChatGPT);
+    updateData(id, ModelList::OnlineRole, model.isOnline);
    updateData(id, ModelList::TemperatureRole, model.temperature());
    updateData(id, ModelList::TopPRole, model.topP());
    updateData(id, ModelList::TopKRole, model.topK());
@@ -873,10 +875,10 @@ QString ModelList::uniqueModelName(const ModelInfo &model) const
    return baseName;
 }

-QString ModelList::modelDirPath(const QString &modelName, bool isChatGPT)
+QString ModelList::modelDirPath(const QString &modelName, bool isOnline)
 {
    QVector<QString> possibleFilePaths;
-    if (isChatGPT)
+    if (isOnline)
        possibleFilePaths << "/" + modelName + ".txt";
    else {
        possibleFilePaths << "/ggml-" + modelName + ".bin";
@@ -911,7 +913,7 @@ void ModelList::updateModelsFromDirectory()

                // All files that end with .bin and have 'ggml' somewhere in the name
                if (((filename.endsWith(".bin") || filename.endsWith(".gguf")) && (/*filename.contains("ggml") ||*/ filename.contains("gguf")) && !filename.startsWith("incomplete"))
-                    || (filename.endsWith(".txt") && filename.startsWith("chatgpt-"))) {
+                    || (filename.endsWith(".txt") && (filename.startsWith("chatgpt-") || filename.startsWith("nomic-")))) {

                    QString filePath = it.filePath();
                    QFileInfo info(filePath);
@@ -934,7 +936,8 @@ void ModelList::updateModelsFromDirectory()

                    for (const QString &id : modelsById) {
                        updateData(id, FilenameRole, filename);
-                        updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
+                        // FIXME: WE should change this to use a consistent filename for online models
+                        updateData(id, OnlineRole, filename.startsWith("chatgpt-") || filename.startsWith("nomic-"));
                        updateData(id, DirpathRole, info.dir().absolutePath() + "/");
                        updateData(id, FilesizeRole, toFileSize(info.size()));
                    }
@@ -1195,7 +1198,7 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
        updateData(id, ModelList::NameRole, modelName);
        updateData(id, ModelList::FilenameRole, modelFilename);
        updateData(id, ModelList::FilesizeRole, "minimal");
-        updateData(id, ModelList::ChatGPTRole, true);
+        updateData(id, ModelList::OnlineRole, true);
        updateData(id, ModelList::DescriptionRole,
            tr("<strong>OpenAI's ChatGPT model GPT-3.5 Turbo</strong><br>") + chatGPTDesc);
        updateData(id, ModelList::RequiresVersionRole, "2.4.2");
@@ -1219,7 +1222,7 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
        updateData(id, ModelList::NameRole, modelName);
        updateData(id, ModelList::FilenameRole, modelFilename);
        updateData(id, ModelList::FilesizeRole, "minimal");
-        updateData(id, ModelList::ChatGPTRole, true);
+        updateData(id, ModelList::OnlineRole, true);
        updateData(id, ModelList::DescriptionRole,
            tr("<strong>OpenAI's ChatGPT model GPT-4</strong><br>") + chatGPTDesc + chatGPT4Warn);
        updateData(id, ModelList::RequiresVersionRole, "2.4.2");
@@ -1229,6 +1232,34 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
        updateData(id, ModelList::QuantRole, "NA");
        updateData(id, ModelList::TypeRole, "GPT");
    }
+
+    {
+        const QString nomicEmbedDesc = tr("<ul><li>For use with LocalDocs feature</li>"
+            "<li>Used for retrieval augmented generation (RAG)</li>"
+            "<li>Requires personal Nomic API key.</li>"
+            "<li>WARNING: Will send your localdocs to Nomic Atlas!</li>"
+            "<li>You can apply for an API key <a href=\"https://atlas.nomic.ai/\">with Nomic Atlas.</a></li>");
+        const QString modelName = "Nomic Embed";
+        const QString id = modelName;
+        const QString modelFilename = "nomic-embed-text-v1.txt";
+        if (contains(modelFilename))
+            changeId(modelFilename, id);
+        if (!contains(id))
+            addModel(id);
+        updateData(id, ModelList::NameRole, modelName);
+        updateData(id, ModelList::FilenameRole, modelFilename);
+        updateData(id, ModelList::FilesizeRole, "minimal");
+        updateData(id, ModelList::OnlineRole, true);
+        updateData(id, ModelList::DisableGUIRole, true);
+        updateData(id, ModelList::DescriptionRole,
+            tr("<strong>LocalDocs Nomic Atlas Embed</strong><br>") + nomicEmbedDesc);
+        updateData(id, ModelList::RequiresVersionRole, "2.6.3");
+        updateData(id, ModelList::OrderRole, "na");
+        updateData(id, ModelList::RamrequiredRole, 0);
+        updateData(id, ModelList::ParametersRole, "?");
+        updateData(id, ModelList::QuantRole, "NA");
+        updateData(id, ModelList::TypeRole, "Bert");
+    }
 }

 void ModelList::updateModelsFromSettings()