expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-10-24 09:29:37 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-backend/bert_impl.h
+++ b/gpt4all-backend/bert_impl.h
@@ -18,9 +18,9 @@ public:

    bool supportsEmbedding() const override { return true; }
    bool supportsCompletion() const override { return true; }
-    bool loadModel(const std::string &modelPath, int n_ctx) override;
+    bool loadModel(const std::string &modelPath, int n_ctx, int ngl) override;
    bool isModelLoaded() const override;
-    size_t requiredMem(const std::string &modelPath, int n_ctx) override;
+    size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
    size_t stateSize() const override;
    size_t saveState(uint8_t *dest) const override;
    size_t restoreState(const uint8_t *src) override;