expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-10-22 16:41:29 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-bindings/golang/binding.cpp
+++ b/gpt4all-bindings/golang/binding.cpp
@@ -23,7 +23,7 @@ void* load_model(const char *fname, int n_threads) {
        fprintf(stderr, "%s: error '%s'\n", __func__, new_error);
        return nullptr;
    }
-    if (!llmodel_loadModel(model, fname, 2048)) {
+    if (!llmodel_loadModel(model, fname, 2048, 100)) {
        llmodel_model_destroy(model);
        return nullptr;
    }