From db34a2f67015cac80fb8e62daa81cd3e90f0f267 Mon Sep 17 00:00:00 2001
From: Aaron Miller <apage43@ninjawhale.com>
Date: Mon, 26 Jun 2023 14:53:17 -0700
Subject: [PATCH] llmodel: skip attempting Metal if model+kvcache > 53% of
 system ram

---
 gpt4all-backend/llamamodel.cpp |  4 +++-
 gpt4all-backend/llmodel.cpp    | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index 93a899b5..71d47ef5 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -178,7 +178,9 @@ int32_t LLamaModel::threadCount() const {
 
 LLamaModel::~LLamaModel()
 {
-    llama_free(d_ptr->ctx);
+    if(d_ptr->ctx) {
+        llama_free(d_ptr->ctx);
+    }
 }
 
 bool LLamaModel::isModelLoaded() const
diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp
index 3563f2c5..d7c721e2 100644
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -1,5 +1,6 @@
 #include "llmodel.h"
 #include "dlhandle.h"
+#include "sysinfo.h"
 
 #include <iostream>
 #include <string>
@@ -129,7 +130,20 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
 
     #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
         if (buildVariant == "auto") {
+            size_t total_mem = getSystemTotalRAMInBytes();
             impl = implementation(f, "metal");
+            if(impl) {
+                LLModel* metalimpl = impl->construct();
+                size_t req_mem = metalimpl->requiredMem(modelPath);
+                float req_to_total = (float) req_mem / (float) total_mem;
+                // on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
+                if (req_to_total >= 0.53) {
+                    delete metalimpl;
+                    impl = nullptr;
+                } else {
+                    return metalimpl;
+                }
+            }
         }
     #endif