mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-05 10:30:29 +00:00
Fix VRAM leak when model loading fails (#1901)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Submodule gpt4all-backend/llama.cpp-mainline updated: 47aec1bcc0...cd1b5a104b
@@ -150,7 +150,15 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
|
||||
|
||||
bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
{
|
||||
gpt_params params;
|
||||
// clean up after previous loadModel()
|
||||
if (d_ptr->model) {
|
||||
llama_free_model(d_ptr->model);
|
||||
d_ptr->model = nullptr;
|
||||
}
|
||||
if (d_ptr->ctx) {
|
||||
llama_free(d_ptr->ctx);
|
||||
d_ptr->ctx = nullptr;
|
||||
}
|
||||
|
||||
if (n_ctx < 8) {
|
||||
std::cerr << "warning: minimum context size is 8, using minimum size.\n";
|
||||
@@ -159,6 +167,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
|
||||
// -- load the model --
|
||||
|
||||
gpt_params params;
|
||||
|
||||
d_ptr->model_params = llama_model_default_params();
|
||||
|
||||
d_ptr->model_params.use_mmap = params.use_mmap;
|
||||
@@ -215,8 +225,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
|
||||
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
|
||||
if (!d_ptr->ctx) {
|
||||
d_ptr->device = -1;
|
||||
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
|
||||
llama_free_model(d_ptr->model);
|
||||
d_ptr->model = nullptr;
|
||||
d_ptr->device = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user