diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 847006c8..2551bc27 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -301,8 +301,9 @@ bool LLamaModel::initializeGPUDevice(size_t memoryRequired, const std::string& d #endif } -bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device) +bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device, std::string *unavail_reason) { + bool result = false; #if defined(GGML_USE_KOMPUTE) ggml_vk_device vkDevice; vkDevice.index = device.index; @@ -310,10 +311,16 @@ bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device) vkDevice.heapSize = device.heapSize; vkDevice.name = device.name; vkDevice.vendor = device.vendor; - return ggml_vk_init_device(vkDevice); + result = ggml_vk_init_device(vkDevice); + if (!result && unavail_reason) { + *unavail_reason = "failed to init device"; + } #else - return false; + if (unavail_reason) { + *unavail_reason = "built without kompute"; + } #endif + return result; } bool LLamaModel::initializeGPUDevice(int device) diff --git a/gpt4all-backend/llamamodel_impl.h b/gpt4all-backend/llamamodel_impl.h index 943437ee..d708ddac 100644 --- a/gpt4all-backend/llamamodel_impl.h +++ b/gpt4all-backend/llamamodel_impl.h @@ -27,7 +27,7 @@ public: int32_t threadCount() const override; std::vector availableGPUDevices(size_t memoryRequired) override; bool initializeGPUDevice(size_t memoryRequired, const std::string& device) override; - bool initializeGPUDevice(const GPUDevice &device) override; + bool initializeGPUDevice(const GPUDevice &device, std::string *unavail_reason) override; bool initializeGPUDevice(int device) override; bool hasGPUDevice() override; bool usingGPUDevice() override; diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h index af3e3ff2..ad33c82d 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/llmodel.h @@ -97,7 +97,12 @@ public: virtual std::vector availableGPUDevices(size_t /*memoryRequired*/) { return std::vector(); } virtual bool initializeGPUDevice(size_t /*memoryRequired*/, const std::string& /*device*/) { return false; } - virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; } + virtual bool initializeGPUDevice(const GPUDevice &/*device*/, std::string *unavail_reason = nullptr) { + if (unavail_reason) { + *unavail_reason = "unsupported model type"; + } + return false; + } virtual bool initializeGPUDevice(int /*device*/) { return false; } virtual bool hasGPUDevice() { return false; } virtual bool usingGPUDevice() { return false; } diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index c7e1208d..835a58a3 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -282,11 +282,14 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) } } - if (!device || !m_llModelInfo.model->initializeGPUDevice(*device)) { - emit reportFallbackReason("
Using CPU: failed to init device"); + emit reportFallbackReason(""); // no fallback yet + std::string unavail_reason; + if (!device) { + // GPU not available + } else if (!m_llModelInfo.model->initializeGPUDevice(*device, &unavail_reason)) { + emit reportFallbackReason(QString::fromStdString("
Using CPU: " + unavail_reason)); } else { actualDevice = QString::fromStdString(device->name); - emit reportFallbackReason(""); // no fallback yet } } @@ -306,6 +309,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) // We might have had to fallback to CPU after load if the model is not possible to accelerate // for instance if the quantization method is not supported on Vulkan yet emit reportDevice("CPU"); + // TODO(cebtenzzre): report somewhere if llamamodel decided the model was not supported emit reportFallbackReason("
Using CPU: unsupported quantization type"); }