expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-10-23 00:50:47 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-chat/modellist.cpp
+++ b/gpt4all-chat/modellist.cpp
@@ -1,6 +1,7 @@
 #include "modellist.h"
 #include "mysettings.h"
 #include "network.h"
+#include "../gpt4all-backend/llmodel.h"

 #include <QFile>
 #include <QStandardPaths>
@@ -108,6 +109,41 @@ void ModelInfo::setContextLength(int l)
    m_contextLength = l;
 }

+int ModelInfo::maxContextLength() const
+{
+    if (m_maxContextLength != -1) return m_maxContextLength;
+    auto path = (dirpath + filename()).toStdString();
+    int layers = LLModel::Implementation::maxContextLength(path);
+    if (layers < 0) {
+        layers = 4096; // fallback value
+    }
+    m_maxContextLength = layers;
+    return m_maxContextLength;
+}
+
+int ModelInfo::gpuLayers() const
+{
+    return MySettings::globalInstance()->modelGpuLayers(*this);
+}
+
+void ModelInfo::setGpuLayers(int l)
+{
+    if (isClone) MySettings::globalInstance()->setModelGpuLayers(*this, l, isClone /*force*/);
+    m_gpuLayers = l;
+}
+
+int ModelInfo::maxGpuLayers() const
+{
+    if (m_maxGpuLayers != -1) return m_maxGpuLayers;
+    auto path = (dirpath + filename()).toStdString();
+    int layers = LLModel::Implementation::layerCount(path);
+    if (layers < 0) {
+        layers = 100; // fallback value
+    }
+    m_maxGpuLayers = layers;
+    return m_maxGpuLayers;
+}
+
 double ModelInfo::repeatPenalty() const
 {
    return MySettings::globalInstance()->modelRepeatPenalty(*this);
@@ -286,6 +322,7 @@ ModelList::ModelList()
    connect(MySettings::globalInstance(), &MySettings::maxLengthChanged, this, &ModelList::updateDataForSettings);
    connect(MySettings::globalInstance(), &MySettings::promptBatchSizeChanged, this, &ModelList::updateDataForSettings);
    connect(MySettings::globalInstance(), &MySettings::contextLengthChanged, this, &ModelList::updateDataForSettings);
+    connect(MySettings::globalInstance(), &MySettings::gpuLayersChanged, this, &ModelList::updateDataForSettings);
    connect(MySettings::globalInstance(), &MySettings::repeatPenaltyChanged, this, &ModelList::updateDataForSettings);
    connect(MySettings::globalInstance(), &MySettings::repeatPenaltyTokensChanged, this, &ModelList::updateDataForSettings);;
    connect(MySettings::globalInstance(), &MySettings::promptTemplateChanged, this, &ModelList::updateDataForSettings);
@@ -539,6 +576,8 @@ QVariant ModelList::dataInternal(const ModelInfo *info, int role) const
            return info->promptBatchSize();
        case ContextLengthRole:
            return info->contextLength();
+        case GpuLayersRole:
+            return info->gpuLayers();
        case RepeatPenaltyRole:
            return info->repeatPenalty();
        case RepeatPenaltyTokensRole:
@@ -664,6 +703,10 @@ void ModelList::updateData(const QString &id, int role, const QVariant &value)
            info->setMaxLength(value.toInt()); break;
        case PromptBatchSizeRole:
            info->setPromptBatchSize(value.toInt()); break;
+        case ContextLengthRole:
+            info->setContextLength(value.toInt()); break;
+        case GpuLayersRole:
+            info->setGpuLayers(value.toInt()); break;
        case RepeatPenaltyRole:
            info->setRepeatPenalty(value.toDouble()); break;
        case RepeatPenaltyTokensRole:
@@ -755,6 +798,7 @@ QString ModelList::clone(const ModelInfo &model)
    updateData(id, ModelList::MaxLengthRole, model.maxLength());
    updateData(id, ModelList::PromptBatchSizeRole, model.promptBatchSize());
    updateData(id, ModelList::ContextLengthRole, model.contextLength());
+    updateData(id, ModelList::GpuLayersRole, model.contextLength());
    updateData(id, ModelList::RepeatPenaltyRole, model.repeatPenalty());
    updateData(id, ModelList::RepeatPenaltyTokensRole, model.repeatPenaltyTokens());
    updateData(id, ModelList::PromptTemplateRole, model.promptTemplate());
@@ -1123,6 +1167,8 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
            updateData(id, ModelList::PromptBatchSizeRole, obj["promptBatchSize"].toInt());
        if (obj.contains("contextLength"))
            updateData(id, ModelList::ContextLengthRole, obj["contextLength"].toInt());
+        if (obj.contains("gpuLayers"))
+            updateData(id, ModelList::GpuLayersRole, obj["gpuLayers"].toInt());
        if (obj.contains("repeatPenalty"))
            updateData(id, ModelList::RepeatPenaltyRole, obj["repeatPenalty"].toDouble());
        if (obj.contains("repeatPenaltyTokens"))
@@ -1217,6 +1263,8 @@ void ModelList::updateModelsFromSettings()
        const int promptBatchSize = settings.value(g + "/promptBatchSize").toInt();
        Q_ASSERT(settings.contains(g + "/contextLength"));
        const int contextLength = settings.value(g + "/contextLength").toInt();
+        Q_ASSERT(settings.contains(g + "/gpuLayers"));
+        const int gpuLayers = settings.value(g + "/gpuLayers").toInt();
        Q_ASSERT(settings.contains(g + "/repeatPenalty"));
        const double repeatPenalty = settings.value(g + "/repeatPenalty").toDouble();
        Q_ASSERT(settings.contains(g + "/repeatPenaltyTokens"));
@@ -1236,6 +1284,7 @@ void ModelList::updateModelsFromSettings()
        updateData(id, ModelList::MaxLengthRole, maxLength);
        updateData(id, ModelList::PromptBatchSizeRole, promptBatchSize);
        updateData(id, ModelList::ContextLengthRole, contextLength);
+        updateData(id, ModelList::GpuLayersRole, gpuLayers);
        updateData(id, ModelList::RepeatPenaltyRole, repeatPenalty);
        updateData(id, ModelList::RepeatPenaltyTokensRole, repeatPenaltyTokens);
        updateData(id, ModelList::PromptTemplateRole, promptTemplate);