expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-07 19:40:21 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-chat/embllm.cpp
+++ b/gpt4all-chat/embllm.cpp
@@ -30,7 +30,7 @@ bool EmbeddingLLM::loadModel()
    }

    m_model = LLModel::Implementation::construct(filePath.toStdString());
-    bool success = m_model->loadModel(filePath.toStdString(), 2048);
+    bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
    if (!success) {
        qWarning() << "WARNING: Could not load sbert";
        delete m_model;