expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-07 19:40:21 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-chat/mysettings.cpp
+++ b/gpt4all-chat/mysettings.cpp
@@ -91,6 +91,7 @@ void MySettings::restoreModelDefaults(const ModelInfo &model)
    setModelMaxLength(model, model.m_maxLength);
    setModelPromptBatchSize(model, model.m_promptBatchSize);
    setModelContextLength(model, model.m_contextLength);
+    setModelGpuLayers(model, model.m_gpuLayers);
    setModelRepeatPenalty(model, model.m_repeatPenalty);
    setModelRepeatPenaltyTokens(model, model.m_repeatPenaltyTokens);
    setModelPromptTemplate(model, model.m_promptTemplate);
@@ -303,6 +304,28 @@ void MySettings::setModelContextLength(const ModelInfo &m, int l, bool force)
        emit contextLengthChanged(m);
 }

+int MySettings::modelGpuLayers(const ModelInfo &m) const
+{
+    QSettings setting;
+    setting.sync();
+    return setting.value(QString("model-%1").arg(m.id()) + "/gpuLayers", m.m_gpuLayers).toInt();
+}
+
+void MySettings::setModelGpuLayers(const ModelInfo &m, int l, bool force)
+{
+    if (modelGpuLayers(m) == l && !force)
+        return;
+
+    QSettings setting;
+    if (m.m_gpuLayers == l && !m.isClone)
+        setting.remove(QString("model-%1").arg(m.id()) + "/gpuLayers");
+    else
+        setting.setValue(QString("model-%1").arg(m.id()) + "/gpuLayers", l);
+    setting.sync();
+    if (!force)
+        emit gpuLayersChanged(m);
+}
+
 double MySettings::modelRepeatPenalty(const ModelInfo &m) const
 {
    QSettings setting;