expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-01-31 14:17:44 -05:00
committed by GitHub
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions

View File

@@ -91,6 +91,7 @@ void MySettings::restoreModelDefaults(const ModelInfo &model)
setModelMaxLength(model, model.m_maxLength);
setModelPromptBatchSize(model, model.m_promptBatchSize);
setModelContextLength(model, model.m_contextLength);
setModelGpuLayers(model, model.m_gpuLayers);
setModelRepeatPenalty(model, model.m_repeatPenalty);
setModelRepeatPenaltyTokens(model, model.m_repeatPenaltyTokens);
setModelPromptTemplate(model, model.m_promptTemplate);
@@ -303,6 +304,28 @@ void MySettings::setModelContextLength(const ModelInfo &m, int l, bool force)
emit contextLengthChanged(m);
}
int MySettings::modelGpuLayers(const ModelInfo &m) const
{
QSettings setting;
setting.sync();
return setting.value(QString("model-%1").arg(m.id()) + "/gpuLayers", m.m_gpuLayers).toInt();
}
void MySettings::setModelGpuLayers(const ModelInfo &m, int l, bool force)
{
if (modelGpuLayers(m) == l && !force)
return;
QSettings setting;
if (m.m_gpuLayers == l && !m.isClone)
setting.remove(QString("model-%1").arg(m.id()) + "/gpuLayers");
else
setting.setValue(QString("model-%1").arg(m.id()) + "/gpuLayers", l);
setting.sync();
if (!force)
emit gpuLayersChanged(m);
}
double MySettings::modelRepeatPenalty(const ModelInfo &m) const
{
QSettings setting;