fix AVX support by removing direct linking to AVX2 libs (#1750)

This commit is contained in:
Jared Van Bortel 2023-12-13 12:11:09 -05:00 committed by GitHub
parent 0600f551b3
commit 3acbef14b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 36 additions and 42 deletions

View File

@ -114,8 +114,6 @@ add_library(llmodel
llmodel_c.h llmodel_c.cpp llmodel_c.h llmodel_c.cpp
dlhandle.h dlhandle.h
) )
target_link_libraries(llmodel PRIVATE ggml-mainline-default)
target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}") target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
set_target_properties(llmodel PROPERTIES set_target_properties(llmodel PROPERTIES

View File

@ -82,7 +82,7 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
static auto* libs = new std::vector<Implementation>([] () { static auto* libs = new std::vector<Implementation>([] () {
std::vector<Implementation> fres; std::vector<Implementation> fres;
std::string impl_name_re = "(bert|llama|gptj|llamamodel-mainline)"; std::string impl_name_re = "(bert|gptj|llamamodel-mainline)";
if (requires_avxonly()) { if (requires_avxonly()) {
impl_name_re += "-avxonly"; impl_name_re += "-avxonly";
} else { } else {
@ -186,6 +186,27 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
return fres; return fres;
} }
LLModel *LLModel::Implementation::constructCpuLlama() {
const LLModel::Implementation *impl = nullptr;
for (const auto &i : implementationList()) {
if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue;
impl = &i;
}
if (!impl) {
std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n";
return nullptr;
}
auto fres = impl->m_construct();
fres->m_implementation = impl;
return fres;
}
std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
static LLModel *cpuLlama = LLModel::Implementation::constructCpuLlama(); // (memory leak)
if (cpuLlama) { return cpuLlama->availableGPUDevices(0); }
return {};
}
void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) { void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
s_implementations_search_path = path; s_implementations_search_path = path;
} }

View File

@ -15,6 +15,15 @@ class Dlhandle;
class LLModel { class LLModel {
public: public:
using Token = int32_t; using Token = int32_t;
struct GPUDevice {
int index = 0;
int type = 0;
size_t heapSize = 0;
std::string name;
std::string vendor;
};
class Implementation { class Implementation {
public: public:
Implementation(Dlhandle&&); Implementation(Dlhandle&&);
@ -29,14 +38,16 @@ public:
static const std::vector<Implementation>& implementationList(); static const std::vector<Implementation>& implementationList();
static const Implementation *implementation(const char *fname, const std::string& buildVariant); static const Implementation *implementation(const char *fname, const std::string& buildVariant);
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto"); static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
static std::vector<GPUDevice> availableGPUDevices();
static void setImplementationsSearchPath(const std::string& path); static void setImplementationsSearchPath(const std::string& path);
static const std::string& implementationsSearchPath(); static const std::string& implementationsSearchPath();
private: private:
static LLModel *constructCpuLlama();
bool (*m_magicMatch)(const char *fname); bool (*m_magicMatch)(const char *fname);
LLModel *(*m_construct)(); LLModel *(*m_construct)();
private:
std::string_view m_modelType; std::string_view m_modelType;
std::string_view m_buildVariant; std::string_view m_buildVariant;
Dlhandle *m_dlhandle; Dlhandle *m_dlhandle;
@ -58,14 +69,6 @@ public:
int32_t n_last_batch_tokens = 0; int32_t n_last_batch_tokens = 0;
}; };
struct GPUDevice {
int index = 0;
int type = 0;
size_t heapSize = 0;
std::string name;
std::string vendor;
};
explicit LLModel() {} explicit LLModel() {}
virtual ~LLModel() {} virtual ~LLModel() {}
@ -106,7 +109,6 @@ public:
virtual bool initializeGPUDevice(int /*device*/) { return false; } virtual bool initializeGPUDevice(int /*device*/) { return false; }
virtual bool hasGPUDevice() { return false; } virtual bool hasGPUDevice() { return false; }
virtual bool usingGPUDevice() { return false; } virtual bool usingGPUDevice() { return false; }
static std::vector<GPUDevice> availableGPUDevices();
protected: protected:
// These are pure virtual because subclasses need to implement as the default implementation of // These are pure virtual because subclasses need to implement as the default implementation of

View File

@ -4,10 +4,6 @@
#include <iostream> #include <iostream>
#include <unordered_set> #include <unordered_set>
#ifdef GGML_USE_KOMPUTE
#include "ggml-vulkan.h"
#endif
void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) { void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
size_t i = 0; size_t i = 0;
promptCtx.n_past = 0; promptCtx.n_past = 0;
@ -177,26 +173,3 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
} }
return std::vector<float>(); return std::vector<float>();
} }
std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
{
#if defined(GGML_USE_KOMPUTE)
std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
std::vector<LLModel::GPUDevice> devices;
for(const auto& vkDevice : vkDevices) {
LLModel::GPUDevice device;
device.index = vkDevice.index;
device.type = vkDevice.type;
device.heapSize = vkDevice.heapSize;
device.name = vkDevice.name;
device.vendor = vkDevice.vendor;
devices.push_back(device);
}
return devices;
#else
return std::vector<LLModel::GPUDevice>();
#endif
}

View File

@ -173,7 +173,7 @@ else()
PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf) PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
endif() endif()
target_link_libraries(chat target_link_libraries(chat
PRIVATE llmodel bert-default) PRIVATE llmodel)
set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)

View File

@ -64,7 +64,7 @@ MySettings::MySettings()
{ {
QSettings::setDefaultFormat(QSettings::IniFormat); QSettings::setDefaultFormat(QSettings::IniFormat);
std::vector<LLModel::GPUDevice> devices = LLModel::availableGPUDevices(); std::vector<LLModel::GPUDevice> devices = LLModel::Implementation::availableGPUDevices();
QVector<QString> deviceList{ "Auto" }; QVector<QString> deviceList{ "Auto" };
for (LLModel::GPUDevice &d : devices) for (LLModel::GPUDevice &d : devices)
deviceList << QString::fromStdString(d.name); deviceList << QString::fromStdString(d.name);