Fix VRAM leak when model loading fails (#1901)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-05 10:30:29 +00:00 · 2024-02-01 15:45:45 -05:00
parent e1eac00ee0
commit 10e3f7bbf5
3 changed files with 16 additions and 3 deletions
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -150,7 +150,15 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)

 bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 {
-    gpt_params params;
+    // clean up after previous loadModel()
+    if (d_ptr->model) {
+        llama_free_model(d_ptr->model);
+        d_ptr->model = nullptr;
+    }
+    if (d_ptr->ctx) {
+        llama_free(d_ptr->ctx);
+        d_ptr->ctx = nullptr;
+    }

    if (n_ctx < 8) {
        std::cerr << "warning: minimum context size is 8, using minimum size.\n";
@@ -159,6 +167,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)

    // -- load the model --

+    gpt_params params;
+
    d_ptr->model_params = llama_model_default_params();

    d_ptr->model_params.use_mmap  = params.use_mmap;
@@ -215,8 +225,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)

    d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
    if (!d_ptr->ctx) {
-        d_ptr->device = -1;
        std::cerr << "LLAMA ERROR: failed to init context for model " <<  modelPath << std::endl;
+        llama_free_model(d_ptr->model);
+        d_ptr->model = nullptr;
+        d_ptr->device = -1;
        return false;
    }