fix AVX support by removing direct linking to AVX2 libs (#1750)

2025-09-04 18:11:02 +00:00 · 2023-12-13 12:11:09 -05:00
parent 0600f551b3
commit 3acbef14b7
6 changed files with 36 additions and 42 deletions
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@@ -114,8 +114,6 @@ add_library(llmodel
    llmodel_c.h llmodel_c.cpp
    dlhandle.h
 )
-target_link_libraries(llmodel PRIVATE ggml-mainline-default)
-target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
 target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")

 set_target_properties(llmodel PROPERTIES
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -82,7 +82,7 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
    static auto* libs = new std::vector<Implementation>([] () {
        std::vector<Implementation> fres;

-        std::string impl_name_re = "(bert|llama|gptj|llamamodel-mainline)";
+        std::string impl_name_re = "(bert|gptj|llamamodel-mainline)";
        if (requires_avxonly()) {
            impl_name_re += "-avxonly";
        } else {
@@ -186,6 +186,27 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
    return fres;
 }

+LLModel *LLModel::Implementation::constructCpuLlama() {
+    const LLModel::Implementation *impl = nullptr;
+    for (const auto &i : implementationList()) {
+        if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue;
+        impl = &i;
+    }
+    if (!impl) {
+        std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n";
+        return nullptr;
+    }
+    auto fres = impl->m_construct();
+    fres->m_implementation = impl;
+    return fres;
+}
+
+std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
+    static LLModel *cpuLlama = LLModel::Implementation::constructCpuLlama(); // (memory leak)
+    if (cpuLlama) { return cpuLlama->availableGPUDevices(0); }
+    return {};
+}
+
 void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
    s_implementations_search_path = path;
 }
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -15,6 +15,15 @@ class Dlhandle;
 class LLModel {
 public:
    using Token = int32_t;
+
+    struct GPUDevice {
+        int index = 0;
+        int type = 0;
+        size_t heapSize = 0;
+        std::string name;
+        std::string vendor;
+    };
+
    class Implementation {
    public:
        Implementation(Dlhandle&&);
@@ -29,14 +38,16 @@ public:
        static const std::vector<Implementation>& implementationList();
        static const Implementation *implementation(const char *fname, const std::string& buildVariant);
        static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
+        static std::vector<GPUDevice> availableGPUDevices();
        static void setImplementationsSearchPath(const std::string& path);
        static const std::string& implementationsSearchPath();

    private:
+        static LLModel *constructCpuLlama();
+
        bool (*m_magicMatch)(const char *fname);
        LLModel *(*m_construct)();

-    private:
        std::string_view m_modelType;
        std::string_view m_buildVariant;
        Dlhandle *m_dlhandle;
@@ -58,14 +69,6 @@ public:
        int32_t n_last_batch_tokens = 0;
    };

-    struct GPUDevice {
-        int index = 0;
-        int type = 0;
-        size_t heapSize = 0;
-        std::string name;
-        std::string vendor;
-    };
-
    explicit LLModel() {}
    virtual ~LLModel() {}

@@ -106,7 +109,6 @@ public:
    virtual bool initializeGPUDevice(int /*device*/) { return false; }
    virtual bool hasGPUDevice() { return false; }
    virtual bool usingGPUDevice() { return false; }
-    static std::vector<GPUDevice> availableGPUDevices();

 protected:
    // These are pure virtual because subclasses need to implement as the default implementation of
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@@ -4,10 +4,6 @@
 #include <iostream>
 #include <unordered_set>

-#ifdef GGML_USE_KOMPUTE
-#include "ggml-vulkan.h"
-#endif
-
 void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
    size_t i = 0;
    promptCtx.n_past = 0;
@@ -177,26 +173,3 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
    }
    return std::vector<float>();
 }
-
-std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
-{
-#if defined(GGML_USE_KOMPUTE)
-    std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
-
-    std::vector<LLModel::GPUDevice> devices;
-    for(const auto& vkDevice : vkDevices) {
-        LLModel::GPUDevice device;
-        device.index = vkDevice.index;
-        device.type = vkDevice.type;
-        device.heapSize = vkDevice.heapSize;
-        device.name = vkDevice.name;
-        device.vendor = vkDevice.vendor;
-
-        devices.push_back(device);
-    }
-
-    return devices;
-#else
-    return std::vector<LLModel::GPUDevice>();
-#endif
-}