mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-07 11:30:05 +00:00
chat: fix blank device in UI and improve Mixpanel reporting (#2409)
Also remove LLModel::hasGPUDevice. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Submodule gpt4all-backend/llama.cpp-mainline updated: f67f4651fa...b2db03acf2
@@ -371,6 +371,11 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
d_ptr->model_params.main_gpu = d_ptr->device;
|
||||
d_ptr->model_params.n_gpu_layers = ngl;
|
||||
d_ptr->model_params.split_mode = LLAMA_SPLIT_MODE_NONE;
|
||||
} else {
|
||||
#ifdef GGML_USE_CUDA
|
||||
std::cerr << "Llama ERROR: CUDA loadModel was called without a device\n";
|
||||
return false;
|
||||
#endif // GGML_USE_CUDA
|
||||
}
|
||||
#elif defined(GGML_USE_METAL)
|
||||
(void)ngl;
|
||||
@@ -383,15 +388,17 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
// always fully offload on Metal
|
||||
// TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
|
||||
d_ptr->model_params.n_gpu_layers = 100;
|
||||
#else
|
||||
#else // !KOMPUTE && !VULKAN && !CUDA && !METAL
|
||||
(void)ngl;
|
||||
#endif
|
||||
|
||||
d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
|
||||
d_ptr->model = llama_load_model_from_file(modelPath.c_str(), d_ptr->model_params);
|
||||
if (!d_ptr->model) {
|
||||
fflush(stdout);
|
||||
#ifndef GGML_USE_CUDA
|
||||
d_ptr->device = -1;
|
||||
d_ptr->deviceName.clear();
|
||||
#endif
|
||||
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
@@ -434,8 +441,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
|
||||
llama_free_model(d_ptr->model);
|
||||
d_ptr->model = nullptr;
|
||||
#ifndef GGML_USE_CUDA
|
||||
d_ptr->device = -1;
|
||||
d_ptr->deviceName.clear();
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -723,31 +732,16 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::hasGPUDevice() const
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
|
||||
return d_ptr->device != -1;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::usingGPUDevice() const
|
||||
{
|
||||
bool hasDevice;
|
||||
if (!d_ptr->model)
|
||||
return false;
|
||||
|
||||
bool usingGPU = llama_model_using_gpu(d_ptr->model);
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
||||
assert(!hasDevice || ggml_vk_has_device());
|
||||
#elif defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
|
||||
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
||||
#elif defined(GGML_USE_METAL)
|
||||
hasDevice = true;
|
||||
#else
|
||||
hasDevice = false;
|
||||
assert(!usingGPU || ggml_vk_has_device());
|
||||
#endif
|
||||
|
||||
return hasDevice;
|
||||
return usingGPU;
|
||||
}
|
||||
|
||||
const char *LLamaModel::backendName() const
|
||||
@@ -760,6 +754,8 @@ const char *LLamaModel::gpuDeviceName() const
|
||||
if (usingGPUDevice()) {
|
||||
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
|
||||
return d_ptr->deviceName.c_str();
|
||||
#elif defined(GGML_USE_METAL)
|
||||
return "Metal";
|
||||
#endif
|
||||
}
|
||||
return nullptr;
|
||||
|
@@ -34,7 +34,6 @@ public:
|
||||
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0) const override;
|
||||
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
|
||||
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
|
||||
bool hasGPUDevice() const override;
|
||||
bool usingGPUDevice() const override;
|
||||
const char *backendName() const override;
|
||||
const char *gpuDeviceName() const override;
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#define LLMODEL_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
@@ -57,23 +58,30 @@ public:
|
||||
backend(backend), index(index), type(type), heapSize(heapSize), name(std::move(name)),
|
||||
vendor(std::move(vendor)) {}
|
||||
|
||||
std::string selectionName() const { return m_backendNames.at(backend) + ": " + name; }
|
||||
std::string reportedName() const { return name + " (" + m_backendNames.at(backend) + ")"; }
|
||||
std::string selectionName() const
|
||||
{
|
||||
assert(backend == "cuda"s || backend == "kompute"s);
|
||||
return backendName() + ": " + name;
|
||||
}
|
||||
|
||||
std::string backendName() const { return backendIdToName(backend); }
|
||||
|
||||
static std::string backendIdToName(const std::string &backend) { return s_backendNames.at(backend); }
|
||||
|
||||
static std::string updateSelectionName(const std::string &name) {
|
||||
if (name == "Auto" || name == "CPU" || name == "Metal")
|
||||
return name;
|
||||
auto it = std::find_if(m_backendNames.begin(), m_backendNames.end(), [&name](const auto &entry) {
|
||||
auto it = std::find_if(s_backendNames.begin(), s_backendNames.end(), [&name](const auto &entry) {
|
||||
return name.starts_with(entry.second + ": ");
|
||||
});
|
||||
if (it != m_backendNames.end())
|
||||
if (it != s_backendNames.end())
|
||||
return name;
|
||||
return "Vulkan: " + name; // previously, there were only Vulkan devices
|
||||
}
|
||||
|
||||
private:
|
||||
static inline const std::unordered_map<std::string, std::string> m_backendNames {
|
||||
{"cuda", "CUDA"}, {"kompute", "Vulkan"},
|
||||
static inline const std::unordered_map<std::string, std::string> s_backendNames {
|
||||
{"cpu", "CPU"}, {"metal", "Metal"}, {"cuda", "CUDA"}, {"kompute", "Vulkan"},
|
||||
};
|
||||
};
|
||||
|
||||
@@ -196,7 +204,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool hasGPUDevice() const { return false; }
|
||||
virtual bool usingGPUDevice() const { return false; }
|
||||
virtual const char *backendName() const { return "cpu"; }
|
||||
virtual const char *gpuDeviceName() const { return nullptr; }
|
||||
|
@@ -287,12 +287,6 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
|
||||
return wrapper->llModel->initializeGPUDevice(device);
|
||||
}
|
||||
|
||||
bool llmodel_has_gpu_device(llmodel_model model)
|
||||
{
|
||||
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
return wrapper->llModel->hasGPUDevice();
|
||||
}
|
||||
|
||||
const char *llmodel_model_backend_name(llmodel_model model)
|
||||
{
|
||||
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
|
@@ -291,11 +291,6 @@ bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gp
|
||||
*/
|
||||
bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
|
||||
|
||||
/**
|
||||
* @return True if a GPU device is successfully initialized, false otherwise.
|
||||
*/
|
||||
bool llmodel_has_gpu_device(llmodel_model model);
|
||||
|
||||
/**
|
||||
* @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
|
||||
*/
|
||||
|
Reference in New Issue
Block a user