embllm: fix use of llama ctx before loading (#2465)

This fixes a regression in PR #2396.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-06-25 11:04:01 -04:00
committed by GitHub
parent 9273b49b62
commit 1a00882276

View File

@@ -84,10 +84,6 @@ bool EmbeddingLLMWorker::loadModel()
return false;
}
// FIXME(jared): the user may want this to take effect without having to restart
int n_threads = MySettings::globalInstance()->threadCount();
m_model->setThreadCount(n_threads);
// NOTE: explicitly loads model on CPU to avoid GPU OOM
// TODO(cebtenzzre): support GPU-accelerated embeddings
bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
@@ -104,6 +100,11 @@ bool EmbeddingLLMWorker::loadModel()
m_model = nullptr;
return false;
}
// FIXME(jared): the user may want this to take effect without having to restart
int n_threads = MySettings::globalInstance()->threadCount();
m_model->setThreadCount(n_threads);
return true;
}