mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
support the llama.cpp CUDA backend (#2310)
* rebase onto llama.cpp commit ggerganov/llama.cpp@d46dbc76f * support for CUDA backend (enabled by default) * partial support for Occam's Vulkan backend (disabled by default) * partial support for HIP/ROCm backend (disabled by default) * sync llama.cpp.cmake with upstream llama.cpp CMakeLists.txt * changes to GPT4All backend, bindings, and chat UI to handle choice of llama.cpp backend (Kompute or CUDA) * ship CUDA runtime with installed version * make device selection in the UI on macOS actually do something * model whitelist: remove dbrx, mamba, persimmon, plamo; add internlm and starcoder2 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -31,10 +31,10 @@ static void llmodel_set_error(const char **errptr, const char *message) {
|
||||
}
|
||||
}
|
||||
|
||||
llmodel_model llmodel_model_create2(const char *model_path, const char *build_variant, const char **error) {
|
||||
llmodel_model llmodel_model_create2(const char *model_path, const char *backend, const char **error) {
|
||||
LLModel *llModel;
|
||||
try {
|
||||
llModel = LLModel::Implementation::construct(model_path, build_variant);
|
||||
llModel = LLModel::Implementation::construct(model_path, backend);
|
||||
} catch (const std::exception& e) {
|
||||
llmodel_set_error(error, e.what());
|
||||
return nullptr;
|
||||
@@ -248,6 +248,7 @@ struct llmodel_gpu_device *llmodel_available_gpu_devices(size_t memoryRequired,
|
||||
for (unsigned i = 0; i < devices.size(); i++) {
|
||||
const auto &dev = devices[i];
|
||||
auto &cdev = c_devices[i];
|
||||
cdev.backend = dev.backend;
|
||||
cdev.index = dev.index;
|
||||
cdev.type = dev.type;
|
||||
cdev.heapSize = dev.heapSize;
|
||||
|
Reference in New Issue
Block a user