expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-01-31 14:17:44 -05:00
committed by GitHub
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions

View File

@@ -195,7 +195,7 @@ public class LLModel implements AutoCloseable {
if(model == null) {
throw new IllegalStateException("Could not load, gpt4all backend returned error: " + error.getValue().getString(0));
}
library.llmodel_loadModel(model, modelPathAbs, 2048);
library.llmodel_loadModel(model, modelPathAbs, 2048, 100);
if(!library.llmodel_isModelLoaded(model)){
throw new IllegalStateException("The model " + modelName + " could not be loaded");

View File

@@ -61,7 +61,7 @@ public interface LLModelLibrary {
Pointer llmodel_model_create2(String model_path, String build_variant, PointerByReference error);
void llmodel_model_destroy(Pointer model);
boolean llmodel_loadModel(Pointer model, String model_path, int n_ctx);
boolean llmodel_loadModel(Pointer model, String model_path, int n_ctx, int ngl);
boolean llmodel_isModelLoaded(Pointer model);
@u_int64_t long llmodel_get_state_size(Pointer model);
@u_int64_t long llmodel_save_state_data(Pointer model, Pointer dest);