Metal+LLama take two (#929)

Support latest llama with Metal --------- Co-authored-by: Adam Treat <adam@nomic.ai> Co-authored-by: niansa/tuxifan <tuxifan@posteo.de>
2025-09-06 11:00:48 +00:00 · 2023-06-09 13:48:46 -07:00
parent b162b5c64e
commit d3ba1295a7
8 changed files with 141 additions and 66 deletions
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -121,20 +121,30 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
    if (!has_at_least_minimal_hardware())
        return nullptr;

-    //TODO: Auto-detect CUDA/OpenCL
-    if (buildVariant == "auto") {
-        if (requires_avxonly()) {
-            buildVariant = "avxonly";
-        } else {
-            buildVariant = "default";
-        }
-    }
    // Read magic
    std::ifstream f(modelPath, std::ios::binary);
    if (!f) return nullptr;
    // Get correct implementation
-    auto impl = implementation(f, buildVariant);
-    if (!impl) return nullptr;
+    const LLModel::Implementation* impl = nullptr;
+
+    #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
+        if (buildVariant == "auto") {
+            impl = implementation(f, "metal");
+        }
+    #endif
+
+    if (!impl) {
+        //TODO: Auto-detect CUDA/OpenCL
+        if (buildVariant == "auto") {
+            if (requires_avxonly()) {
+                buildVariant = "avxonly";
+            } else {
+                buildVariant = "default";
+            }
+        }
+        impl = implementation(f, buildVariant);
+        if (!impl) return nullptr;
+    }
    f.close();
    // Construct and return llmodel implementation
    return impl->construct();