llamamodel: add 12 new architectures for CPU inference (#1914)

Baichuan, BLOOM, CodeShell, GPT-2, Orion, Persimmon, Phi and Phi-2, Plamo, Qwen, Qwen2, Refact, StableLM Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-08-15 22:53:22 +00:00 · 2024-02-05 16:49:31 -05:00 · 2024-02-05 16:49:31 -05:00 · 92c025a7f6
commit 92c025a7f6
parent 4461af35c7
1 changed files with 8 additions and 1 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -508,7 +508,14 @@ DLL_EXPORT bool magic_match(const char *fname) {
    auto * ctx = load_gguf(fname, arch);

    bool valid = true;
-    if (!(arch == "llama" || arch == "starcoder" || arch == "falcon" || arch == "mpt")) {
+
+    static const std::vector<const char *> known_arches {
+        "baichuan", "bloom", "codeshell", "falcon", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", "plamo",
+        "qwen", "qwen2", "refact", "stablelm", "starcoder"
+    };
+
+    if (std::find(known_arches.begin(), known_arches.end(), arch) == known_arches.end()) {
+        // not supported by this version of llama.cpp
        if (!(arch == "gptj" || arch == "bert")) { // we support these via other modules
            std::cerr << __func__ << ": unsupported model architecture: " << arch << "\n";
        }