fix AVX support by removing direct linking to AVX2 libs (#1750)

2025-09-07 03:20:26 +00:00 · 2023-12-13 12:11:09 -05:00
parent 0600f551b3
commit 3acbef14b7
6 changed files with 36 additions and 42 deletions
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@@ -114,8 +114,6 @@ add_library(llmodel
    llmodel_c.h llmodel_c.cpp
    dlhandle.h
 )
 target_link_libraries(llmodel PRIVATE ggml-mainline-default)
 target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
 target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
 set_target_properties(llmodel PROPERTIES
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -82,7 +82,7 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
    static auto* libs = new std::vector<Implementation>([] () {
        std::vector<Implementation> fres;
-        std::string impl_name_re = "(bert|llama|gptj|llamamodel-mainline)";
+        std::string impl_name_re = "(bert|gptj|llamamodel-mainline)";
        if (requires_avxonly()) {
            impl_name_re += "-avxonly";
        } else {
@@ -186,6 +186,27 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
    return fres;
 }
 LLModel *LLModel::Implementation::constructCpuLlama() {
    const LLModel::Implementation *impl = nullptr;
    for (const auto &i : implementationList()) {
        if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue;
        impl = &i;
    }
    if (!impl) {
        std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n";
        return nullptr;
    }
    auto fres = impl->m_construct();
    fres->m_implementation = impl;
    return fres;
 }
 std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
    static LLModel *cpuLlama = LLModel::Implementation::constructCpuLlama(); // (memory leak)
    if (cpuLlama) { return cpuLlama->availableGPUDevices(0); }
    return {};
 }
 void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
    s_implementations_search_path = path;
 }
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -15,6 +15,15 @@ class Dlhandle;
 class LLModel {
 public:
    using Token = int32_t;
    struct GPUDevice {
        int index = 0;
        int type = 0;
        size_t heapSize = 0;
        std::string name;
        std::string vendor;
    };
    class Implementation {
    public:
        Implementation(Dlhandle&&);
@@ -29,14 +38,16 @@ public:
        static const std::vector<Implementation>& implementationList();
        static const Implementation *implementation(const char *fname, const std::string& buildVariant);
        static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
        static std::vector<GPUDevice> availableGPUDevices();
        static void setImplementationsSearchPath(const std::string& path);
        static const std::string& implementationsSearchPath();
    private:
        static LLModel *constructCpuLlama();
        bool (*m_magicMatch)(const char *fname);
        LLModel *(*m_construct)();
    private:
        std::string_view m_modelType;
        std::string_view m_buildVariant;
        Dlhandle *m_dlhandle;
@@ -58,14 +69,6 @@ public:
        int32_t n_last_batch_tokens = 0;
    };
    struct GPUDevice {
        int index = 0;
        int type = 0;
        size_t heapSize = 0;
        std::string name;
        std::string vendor;
    };
    explicit LLModel() {}
    virtual ~LLModel() {}
@@ -106,7 +109,6 @@ public:
    virtual bool initializeGPUDevice(int /*device*/) { return false; }
    virtual bool hasGPUDevice() { return false; }
    virtual bool usingGPUDevice() { return false; }
    static std::vector<GPUDevice> availableGPUDevices();
 protected:
    // These are pure virtual because subclasses need to implement as the default implementation of
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@@ -4,10 +4,6 @@
 #include <iostream>
 #include <unordered_set>
 #ifdef GGML_USE_KOMPUTE
 #include "ggml-vulkan.h"
 #endif
 void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
    size_t i = 0;
    promptCtx.n_past = 0;
@@ -177,26 +173,3 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
    }
    return std::vector<float>();
 }
 std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
 {
 #if defined(GGML_USE_KOMPUTE)
    std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
    std::vector<LLModel::GPUDevice> devices;
    for(const auto& vkDevice : vkDevices) {
        LLModel::GPUDevice device;
        device.index = vkDevice.index;
        device.type = vkDevice.type;
        device.heapSize = vkDevice.heapSize;
        device.name = vkDevice.name;
        device.vendor = vkDevice.vendor;
        devices.push_back(device);
    }
    return devices;
 #else
    return std::vector<LLModel::GPUDevice>();
 #endif
 }
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@@ -173,7 +173,7 @@ else()
    PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
 endif()
 target_link_libraries(chat
-    PRIVATE llmodel bert-default)
+    PRIVATE llmodel)
 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
--- a/gpt4all-chat/mysettings.cpp
+++ b/gpt4all-chat/mysettings.cpp
@@ -64,7 +64,7 @@ MySettings::MySettings()
 {
    QSettings::setDefaultFormat(QSettings::IniFormat);
-    std::vector<LLModel::GPUDevice> devices = LLModel::availableGPUDevices();
+    std::vector<LLModel::GPUDevice> devices = LLModel::Implementation::availableGPUDevices();
    QVector<QString> deviceList{ "Auto" };
    for (LLModel::GPUDevice &d : devices)
        deviceList << QString::fromStdString(d.name);