expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-01-31 14:17:44 -05:00
committed by GitHub
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions

View File

@@ -63,6 +63,8 @@ public:
Q_INVOKABLE void setModelSystemPrompt(const ModelInfo &m, const QString &p, bool force = false);
int modelContextLength(const ModelInfo &m) const;
Q_INVOKABLE void setModelContextLength(const ModelInfo &m, int s, bool force = false);
int modelGpuLayers(const ModelInfo &m) const;
Q_INVOKABLE void setModelGpuLayers(const ModelInfo &m, int s, bool force = false);
// Application settings
int threadCount() const;
@@ -85,6 +87,8 @@ public:
void setDevice(const QString &u);
int32_t contextLength() const;
void setContextLength(int32_t value);
int32_t gpuLayers() const;
void setGpuLayers(int32_t value);
// Release/Download settings
QString lastVersionStarted() const;
@@ -121,6 +125,7 @@ Q_SIGNALS:
void maxLengthChanged(const ModelInfo &model);
void promptBatchSizeChanged(const ModelInfo &model);
void contextLengthChanged(const ModelInfo &model);
void gpuLayersChanged(const ModelInfo &model);
void repeatPenaltyChanged(const ModelInfo &model);
void repeatPenaltyTokensChanged(const ModelInfo &model);
void promptTemplateChanged(const ModelInfo &model);