Revert "llama on Metal (#885)"

This reverts commit c55f81b860.
2025-09-10 04:49:07 +00:00 · 2023-06-09 15:08:46 -04:00
parent c55f81b860
commit b162b5c64e
7 changed files with 64 additions and 138 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -115,12 +115,6 @@ bool LLamaModel::loadModel(const std::string &modelPath)
 #if LLAMA_DATE <= 230511
    d_ptr->params.n_parts  = params.n_parts;
 #endif
-#ifdef GGML_USE_METAL
-    std::cerr << "llama.cpp: using Metal" << std::endl;
-    // metal always runs the whole model if n_gpu_layers is not 0, at least
-    // currently
-    d_ptr->params.n_gpu_layers = 1;
-#endif

    d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
    if (!d_ptr->ctx) {
@@ -234,30 +228,7 @@ DLL_EXPORT bool magic_match(std::istream& f) {
    // Check version
    uint32_t version = 0;
    f.read(reinterpret_cast<char*>(&version), sizeof(version));
-    if (!(version LLAMA_VERSIONS)) {
-        return false;
-    }
-#ifdef GGML_USE_METAL
-    // Check quant supported on metal
-    // skip fields
-    off_t offset = sizeof(uint32_t) * 6; // n_vocab, n_embd, n_mult, n_head, n_layer, n_rot
-    f.seekg(offset, std::ios_base::cur);
-    uint32_t ftype;
-    f.read(reinterpret_cast<char*>(&ftype), sizeof(ftype)); // ftype
-    switch((enum llama_ftype) ftype) {
-        // currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55
-        case LLAMA_FTYPE_MOSTLY_F16:
-        case LLAMA_FTYPE_MOSTLY_Q2_K:
-        case LLAMA_FTYPE_MOSTLY_Q4_0:
-        case LLAMA_FTYPE_MOSTLY_Q6_K:
-        case LLAMA_FTYPE_MOSTLY_Q4_K_S:
-        case LLAMA_FTYPE_MOSTLY_Q4_K_M:
-            return true;
-        default: // unsupported quant-type for Metal
-            return false;
-    }
-#endif
-    return true;
+    return version LLAMA_VERSIONS;
 }

 DLL_EXPORT LLModel *construct() {