Add Nomic Embed model for atlas with localdocs.

This commit is contained in:
Adam Treat
2024-01-22 12:36:01 -05:00
parent eadc3b8d80
commit d14b95f4bd
15 changed files with 506 additions and 78 deletions

View File

@@ -10,6 +10,7 @@
//#define USE_LOCAL_MODELSJSON
#define DEFAULT_EMBEDDING_MODEL "all-MiniLM-L6-v2-f16.gguf"
#define NOMIC_EMBEDDING_MODEL "nomic-embed-text-v1.txt"
QString ModelInfo::id() const
{
@@ -202,7 +203,8 @@ bool EmbeddingModels::filterAcceptsRow(int sourceRow,
{
QModelIndex index = sourceModel()->index(sourceRow, 0, sourceParent);
bool isInstalled = sourceModel()->data(index, ModelList::InstalledRole).toBool();
bool isEmbedding = sourceModel()->data(index, ModelList::FilenameRole).toString() == DEFAULT_EMBEDDING_MODEL;
bool isEmbedding = sourceModel()->data(index, ModelList::FilenameRole).toString() == DEFAULT_EMBEDDING_MODEL ||
sourceModel()->data(index, ModelList::FilenameRole).toString() == NOMIC_EMBEDDING_MODEL;
return isInstalled && isEmbedding;
}
@@ -405,7 +407,7 @@ ModelInfo ModelList::defaultModelInfo() const
const size_t ramrequired = defaultModel->ramrequired;
// If we don't have either setting, then just use the first model that requires less than 16GB that is installed
if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16)
if (!hasUserDefaultName && !info->isOnline && ramrequired > 0 && ramrequired < 16)
break;
// If we have a user specified default and match, then use it
@@ -526,8 +528,8 @@ QVariant ModelList::dataInternal(const ModelInfo *info, int role) const
return info->installed;
case DefaultRole:
return info->isDefault;
case ChatGPTRole:
return info->isChatGPT;
case OnlineRole:
return info->isOnline;
case DisableGUIRole:
return info->disableGUI;
case DescriptionRole:
@@ -655,8 +657,8 @@ void ModelList::updateData(const QString &id, int role, const QVariant &value)
info->installed = value.toBool(); break;
case DefaultRole:
info->isDefault = value.toBool(); break;
case ChatGPTRole:
info->isChatGPT = value.toBool(); break;
case OnlineRole:
info->isOnline = value.toBool(); break;
case DisableGUIRole:
info->disableGUI = value.toBool(); break;
case DescriptionRole:
@@ -791,7 +793,7 @@ QString ModelList::clone(const ModelInfo &model)
updateData(id, ModelList::FilenameRole, model.filename());
updateData(id, ModelList::DirpathRole, model.dirpath);
updateData(id, ModelList::InstalledRole, model.installed);
updateData(id, ModelList::ChatGPTRole, model.isChatGPT);
updateData(id, ModelList::OnlineRole, model.isOnline);
updateData(id, ModelList::TemperatureRole, model.temperature());
updateData(id, ModelList::TopPRole, model.topP());
updateData(id, ModelList::TopKRole, model.topK());
@@ -873,10 +875,10 @@ QString ModelList::uniqueModelName(const ModelInfo &model) const
return baseName;
}
QString ModelList::modelDirPath(const QString &modelName, bool isChatGPT)
QString ModelList::modelDirPath(const QString &modelName, bool isOnline)
{
QVector<QString> possibleFilePaths;
if (isChatGPT)
if (isOnline)
possibleFilePaths << "/" + modelName + ".txt";
else {
possibleFilePaths << "/ggml-" + modelName + ".bin";
@@ -911,7 +913,7 @@ void ModelList::updateModelsFromDirectory()
// All files that end with .bin and have 'ggml' somewhere in the name
if (((filename.endsWith(".bin") || filename.endsWith(".gguf")) && (/*filename.contains("ggml") ||*/ filename.contains("gguf")) && !filename.startsWith("incomplete"))
|| (filename.endsWith(".txt") && filename.startsWith("chatgpt-"))) {
|| (filename.endsWith(".txt") && (filename.startsWith("chatgpt-") || filename.startsWith("nomic-")))) {
QString filePath = it.filePath();
QFileInfo info(filePath);
@@ -934,7 +936,8 @@ void ModelList::updateModelsFromDirectory()
for (const QString &id : modelsById) {
updateData(id, FilenameRole, filename);
updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
// FIXME: WE should change this to use a consistent filename for online models
updateData(id, OnlineRole, filename.startsWith("chatgpt-") || filename.startsWith("nomic-"));
updateData(id, DirpathRole, info.dir().absolutePath() + "/");
updateData(id, FilesizeRole, toFileSize(info.size()));
}
@@ -1195,7 +1198,7 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
updateData(id, ModelList::NameRole, modelName);
updateData(id, ModelList::FilenameRole, modelFilename);
updateData(id, ModelList::FilesizeRole, "minimal");
updateData(id, ModelList::ChatGPTRole, true);
updateData(id, ModelList::OnlineRole, true);
updateData(id, ModelList::DescriptionRole,
tr("<strong>OpenAI's ChatGPT model GPT-3.5 Turbo</strong><br>") + chatGPTDesc);
updateData(id, ModelList::RequiresVersionRole, "2.4.2");
@@ -1219,7 +1222,7 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
updateData(id, ModelList::NameRole, modelName);
updateData(id, ModelList::FilenameRole, modelFilename);
updateData(id, ModelList::FilesizeRole, "minimal");
updateData(id, ModelList::ChatGPTRole, true);
updateData(id, ModelList::OnlineRole, true);
updateData(id, ModelList::DescriptionRole,
tr("<strong>OpenAI's ChatGPT model GPT-4</strong><br>") + chatGPTDesc + chatGPT4Warn);
updateData(id, ModelList::RequiresVersionRole, "2.4.2");
@@ -1229,6 +1232,34 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
updateData(id, ModelList::QuantRole, "NA");
updateData(id, ModelList::TypeRole, "GPT");
}
{
const QString nomicEmbedDesc = tr("<ul><li>For use with LocalDocs feature</li>"
"<li>Used for retrieval augmented generation (RAG)</li>"
"<li>Requires personal Nomic API key.</li>"
"<li>WARNING: Will send your localdocs to Nomic Atlas!</li>"
"<li>You can apply for an API key <a href=\"https://atlas.nomic.ai/\">with Nomic Atlas.</a></li>");
const QString modelName = "Nomic Embed";
const QString id = modelName;
const QString modelFilename = "nomic-embed-text-v1.txt";
if (contains(modelFilename))
changeId(modelFilename, id);
if (!contains(id))
addModel(id);
updateData(id, ModelList::NameRole, modelName);
updateData(id, ModelList::FilenameRole, modelFilename);
updateData(id, ModelList::FilesizeRole, "minimal");
updateData(id, ModelList::OnlineRole, true);
updateData(id, ModelList::DisableGUIRole, true);
updateData(id, ModelList::DescriptionRole,
tr("<strong>LocalDocs Nomic Atlas Embed</strong><br>") + nomicEmbedDesc);
updateData(id, ModelList::RequiresVersionRole, "2.6.3");
updateData(id, ModelList::OrderRole, "na");
updateData(id, ModelList::RamrequiredRole, 0);
updateData(id, ModelList::ParametersRole, "?");
updateData(id, ModelList::QuantRole, "NA");
updateData(id, ModelList::TypeRole, "Bert");
}
}
void ModelList::updateModelsFromSettings()