mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-07 11:30:05 +00:00
implement local Nomic Embed via llama.cpp (#2086)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -27,7 +27,7 @@ void EmbeddingLLMWorker::wait()
|
||||
|
||||
bool EmbeddingLLMWorker::loadModel()
|
||||
{
|
||||
const EmbeddingModels *embeddingModels = ModelList::globalInstance()->embeddingModels();
|
||||
const EmbeddingModels *embeddingModels = ModelList::globalInstance()->installedEmbeddingModels();
|
||||
if (!embeddingModels->count())
|
||||
return false;
|
||||
|
||||
@@ -41,7 +41,8 @@ bool EmbeddingLLMWorker::loadModel()
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isNomic = fileInfo.fileName().startsWith("nomic");
|
||||
auto filename = fileInfo.fileName();
|
||||
bool isNomic = filename.startsWith("nomic-") && filename.endsWith(".txt");
|
||||
if (isNomic) {
|
||||
QFile file(filePath);
|
||||
file.open(QIODeviceBase::ReadOnly | QIODeviceBase::Text);
|
||||
@@ -52,16 +53,18 @@ bool EmbeddingLLMWorker::loadModel()
|
||||
}
|
||||
|
||||
m_model = LLModel::Implementation::construct(filePath.toStdString());
|
||||
// NOTE: explicitly loads model on CPU to avoid GPU OOM
|
||||
// TODO(cebtenzzre): support GPU-accelerated embeddings
|
||||
bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
|
||||
if (!success) {
|
||||
qWarning() << "WARNING: Could not load sbert";
|
||||
qWarning() << "WARNING: Could not load embedding model";
|
||||
delete m_model;
|
||||
m_model = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_model->implementation().modelType() != "Bert") {
|
||||
qWarning() << "WARNING: Model type is not sbert";
|
||||
if (!m_model->supportsEmbedding()) {
|
||||
qWarning() << "WARNING: Model type does not support embeddings";
|
||||
delete m_model;
|
||||
m_model = nullptr;
|
||||
return false;
|
||||
@@ -79,21 +82,49 @@ bool EmbeddingLLMWorker::isNomic() const
|
||||
return !m_nomicAPIKey.isEmpty();
|
||||
}
|
||||
|
||||
// this function is always called for retrieval tasks
|
||||
std::vector<float> EmbeddingLLMWorker::generateSyncEmbedding(const QString &text)
|
||||
{
|
||||
if (!hasModel() && !loadModel()) {
|
||||
qWarning() << "WARNING: Could not load model for embeddings";
|
||||
return std::vector<float>();
|
||||
return {};
|
||||
}
|
||||
|
||||
if (isNomic()) {
|
||||
qWarning() << "WARNING: Request to generate sync embeddings for non-local model invalid";
|
||||
return std::vector<float>();
|
||||
return {};
|
||||
}
|
||||
|
||||
return m_model->embedding(text.toStdString());
|
||||
std::vector<float> embedding(m_model->embeddingSize());
|
||||
try {
|
||||
m_model->embed({text.toStdString()}, embedding.data(), true);
|
||||
} catch (const std::exception &e) {
|
||||
qWarning() << "WARNING: LLModel::embed failed: " << e.what();
|
||||
return {};
|
||||
}
|
||||
return embedding;
|
||||
}
|
||||
|
||||
void EmbeddingLLMWorker::sendAtlasRequest(const QStringList &texts, const QString &taskType, QVariant userData) {
|
||||
QJsonObject root;
|
||||
root.insert("model", "nomic-embed-text-v1");
|
||||
root.insert("texts", QJsonArray::fromStringList(texts));
|
||||
root.insert("task_type", taskType);
|
||||
|
||||
QJsonDocument doc(root);
|
||||
|
||||
QUrl nomicUrl("https://api-atlas.nomic.ai/v1/embedding/text");
|
||||
const QString authorization = QString("Bearer %1").arg(m_nomicAPIKey).trimmed();
|
||||
QNetworkRequest request(nomicUrl);
|
||||
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||
request.setRawHeader("Authorization", authorization.toUtf8());
|
||||
request.setAttribute(QNetworkRequest::User, userData);
|
||||
QNetworkReply *reply = m_networkManager->post(request, doc.toJson(QJsonDocument::Compact));
|
||||
connect(qApp, &QCoreApplication::aboutToQuit, reply, &QNetworkReply::abort);
|
||||
connect(reply, &QNetworkReply::finished, this, &EmbeddingLLMWorker::handleFinished);
|
||||
}
|
||||
|
||||
// this function is always called for retrieval tasks
|
||||
void EmbeddingLLMWorker::requestSyncEmbedding(const QString &text)
|
||||
{
|
||||
if (!hasModel() && !loadModel()) {
|
||||
@@ -108,25 +139,10 @@ void EmbeddingLLMWorker::requestSyncEmbedding(const QString &text)
|
||||
|
||||
Q_ASSERT(hasModel());
|
||||
|
||||
QJsonObject root;
|
||||
root.insert("model", "nomic-embed-text-v1");
|
||||
QJsonArray texts;
|
||||
texts.append(text);
|
||||
root.insert("texts", texts);
|
||||
root.insert("task_type", "search_query");
|
||||
|
||||
QJsonDocument doc(root);
|
||||
|
||||
QUrl nomicUrl("https://api-atlas.nomic.ai/v1/embedding/text");
|
||||
const QString authorization = QString("Bearer %1").arg(m_nomicAPIKey).trimmed();
|
||||
QNetworkRequest request(nomicUrl);
|
||||
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||
request.setRawHeader("Authorization", authorization.toUtf8());
|
||||
QNetworkReply *reply = m_networkManager->post(request, doc.toJson(QJsonDocument::Compact));
|
||||
connect(qApp, &QCoreApplication::aboutToQuit, reply, &QNetworkReply::abort);
|
||||
connect(reply, &QNetworkReply::finished, this, &EmbeddingLLMWorker::handleFinished);
|
||||
sendAtlasRequest({text}, "search_query");
|
||||
}
|
||||
|
||||
// this function is always called for storage into the database
|
||||
void EmbeddingLLMWorker::requestAsyncEmbedding(const QVector<EmbeddingChunk> &chunks)
|
||||
{
|
||||
if (!hasModel() && !loadModel()) {
|
||||
@@ -141,33 +157,24 @@ void EmbeddingLLMWorker::requestAsyncEmbedding(const QVector<EmbeddingChunk> &ch
|
||||
EmbeddingResult result;
|
||||
result.folder_id = c.folder_id;
|
||||
result.chunk_id = c.chunk_id;
|
||||
result.embedding = m_model->embedding(c.chunk.toStdString());
|
||||
// TODO(cebtenzzre): take advantage of batched embeddings
|
||||
result.embedding.resize(m_model->embeddingSize());
|
||||
try {
|
||||
m_model->embed({c.chunk.toStdString()}, result.embedding.data(), false);
|
||||
} catch (const std::exception &e) {
|
||||
qWarning() << "WARNING: LLModel::embed failed:" << e.what();
|
||||
return;
|
||||
}
|
||||
results << result;
|
||||
}
|
||||
emit embeddingsGenerated(results);
|
||||
return;
|
||||
};
|
||||
|
||||
QJsonObject root;
|
||||
root.insert("model", "nomic-embed-text-v1");
|
||||
QJsonArray texts;
|
||||
|
||||
for (auto c : chunks)
|
||||
QStringList texts;
|
||||
for (auto &c: chunks)
|
||||
texts.append(c.chunk);
|
||||
root.insert("texts", texts);
|
||||
|
||||
QJsonDocument doc(root);
|
||||
|
||||
QUrl nomicUrl("https://api-atlas.nomic.ai/v1/embedding/text");
|
||||
const QString authorization = QString("Bearer %1").arg(m_nomicAPIKey).trimmed();
|
||||
QNetworkRequest request(nomicUrl);
|
||||
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||
request.setRawHeader("Authorization", authorization.toUtf8());
|
||||
request.setAttribute(QNetworkRequest::User, QVariant::fromValue(chunks));
|
||||
|
||||
QNetworkReply *reply = m_networkManager->post(request, doc.toJson(QJsonDocument::Compact));
|
||||
connect(qApp, &QCoreApplication::aboutToQuit, reply, &QNetworkReply::abort);
|
||||
connect(reply, &QNetworkReply::finished, this, &EmbeddingLLMWorker::handleFinished);
|
||||
sendAtlasRequest(texts, "search_document", QVariant::fromValue(chunks));
|
||||
}
|
||||
|
||||
std::vector<float> jsonArrayToVector(const QJsonArray &jsonArray) {
|
||||
|
Reference in New Issue
Block a user