From db34a2f67015cac80fb8e62daa81cd3e90f0f267 Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Mon, 26 Jun 2023 14:53:17 -0700 Subject: [PATCH] llmodel: skip attempting Metal if model+kvcache > 53% of system ram --- gpt4all-backend/llamamodel.cpp | 4 +++- gpt4all-backend/llmodel.cpp | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 93a899b5..71d47ef5 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -178,7 +178,9 @@ int32_t LLamaModel::threadCount() const { LLamaModel::~LLamaModel() { - llama_free(d_ptr->ctx); + if(d_ptr->ctx) { + llama_free(d_ptr->ctx); + } } bool LLamaModel::isModelLoaded() const diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index 3563f2c5..d7c721e2 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -1,5 +1,6 @@ #include "llmodel.h" #include "dlhandle.h" +#include "sysinfo.h" #include #include @@ -129,7 +130,20 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs if (buildVariant == "auto") { + size_t total_mem = getSystemTotalRAMInBytes(); impl = implementation(f, "metal"); + if(impl) { + LLModel* metalimpl = impl->construct(); + size_t req_mem = metalimpl->requiredMem(modelPath); + float req_to_total = (float) req_mem / (float) total_mem; + // on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not + if (req_to_total >= 0.53) { + delete metalimpl; + impl = nullptr; + } else { + return metalimpl; + } + } } #endif