expose n_gpu_layers parameter of llama.cpp (#1890)

Also dynamically limit the GPU layers and context length fields to the maximum supported by the model. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-17 16:28:20 +00:00 · 2024-01-31 14:17:44 -05:00
parent f549d5a70a
commit 061d1969f8
31 changed files with 381 additions and 157 deletions
--- a/gpt4all-bindings/csharp/Gpt4All/Bindings/LLModel.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Bindings/LLModel.cs
@@ -183,7 +183,7 @@ public class LLModel : ILLModel
    /// <returns>true if the model was loaded successfully, false otherwise.</returns>
    public bool Load(string modelPath)
    {
-        return NativeMethods.llmodel_loadModel(_handle, modelPath, 2048);
+        return NativeMethods.llmodel_loadModel(_handle, modelPath, 2048, 100);
    }

    protected void Destroy()
--- a/gpt4all-bindings/csharp/Gpt4All/Bindings/NativeMethods.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Bindings/NativeMethods.cs
@@ -71,7 +71,8 @@ internal static unsafe partial class NativeMethods
    public static extern bool llmodel_loadModel(
        [NativeTypeName("llmodel_model")] IntPtr model,
        [NativeTypeName("const char *")][MarshalAs(UnmanagedType.LPUTF8Str)] string model_path,
-        [NativeTypeName("int32_t")] int n_ctx);
+        [NativeTypeName("int32_t")] int n_ctx,
+        [NativeTypeName("int32_t")] int ngl);

    [DllImport("libllmodel", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]

--- a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
@@ -43,7 +43,7 @@ public class Gpt4AllModelFactory : IGpt4AllModelFactory
        }
        _logger.LogDebug("Model created handle=0x{ModelHandle:X8}", handle);
        _logger.LogInformation("Model loading started");
-        var loadedSuccessfully = NativeMethods.llmodel_loadModel(handle, modelPath, 2048);
+        var loadedSuccessfully = NativeMethods.llmodel_loadModel(handle, modelPath, 2048, 100);
        _logger.LogInformation("Model loading completed success={ModelLoadSuccess}", loadedSuccessfully);
        if (!loadedSuccessfully)
        {