expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-10-23 00:50:47 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-backend/gptj.cpp
+++ b/gpt4all-backend/gptj.cpp
@@ -672,8 +672,9 @@ GPTJ::GPTJ()
    d_ptr->modelLoaded = false;
 }

-size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx) {
+size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) {
    (void)n_ctx;
+    (void)ngl;
    gptj_model dummy_model;
    gpt_vocab dummy_vocab;
    size_t mem_req;
@@ -681,8 +682,9 @@ size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx) {
    return mem_req;
 }

-bool GPTJ::loadModel(const std::string &modelPath, int n_ctx) {
+bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) {
    (void)n_ctx;
+    (void)ngl;
    std::mt19937 rng(time(NULL));
    d_ptr->rng = rng;