chat: fix blank device in UI and improve Mixpanel reporting (#2409)

Also remove LLModel::hasGPUDevice.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-06-26 15:26:27 -04:00
committed by GitHub
parent 53fc2d56f6
commit 01870b4a46
13 changed files with 146 additions and 91 deletions

View File

@@ -371,6 +371,11 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->model_params.main_gpu = d_ptr->device;
d_ptr->model_params.n_gpu_layers = ngl;
d_ptr->model_params.split_mode = LLAMA_SPLIT_MODE_NONE;
} else {
#ifdef GGML_USE_CUDA
std::cerr << "Llama ERROR: CUDA loadModel was called without a device\n";
return false;
#endif // GGML_USE_CUDA
}
#elif defined(GGML_USE_METAL)
(void)ngl;
@@ -383,15 +388,17 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
// always fully offload on Metal
// TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
d_ptr->model_params.n_gpu_layers = 100;
#else
#else // !KOMPUTE && !VULKAN && !CUDA && !METAL
(void)ngl;
#endif
d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
d_ptr->model = llama_load_model_from_file(modelPath.c_str(), d_ptr->model_params);
if (!d_ptr->model) {
fflush(stdout);
#ifndef GGML_USE_CUDA
d_ptr->device = -1;
d_ptr->deviceName.clear();
#endif
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
return false;
}
@@ -434,8 +441,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
llama_free_model(d_ptr->model);
d_ptr->model = nullptr;
#ifndef GGML_USE_CUDA
d_ptr->device = -1;
d_ptr->deviceName.clear();
#endif
return false;
}
@@ -723,31 +732,16 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
#endif
}
bool LLamaModel::hasGPUDevice() const
{
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
return d_ptr->device != -1;
#else
return false;
#endif
}
bool LLamaModel::usingGPUDevice() const
{
bool hasDevice;
if (!d_ptr->model)
return false;
bool usingGPU = llama_model_using_gpu(d_ptr->model);
#ifdef GGML_USE_KOMPUTE
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
assert(!hasDevice || ggml_vk_has_device());
#elif defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
#elif defined(GGML_USE_METAL)
hasDevice = true;
#else
hasDevice = false;
assert(!usingGPU || ggml_vk_has_device());
#endif
return hasDevice;
return usingGPU;
}
const char *LLamaModel::backendName() const
@@ -760,6 +754,8 @@ const char *LLamaModel::gpuDeviceName() const
if (usingGPUDevice()) {
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
return d_ptr->deviceName.c_str();
#elif defined(GGML_USE_METAL)
return "Metal";
#endif
}
return nullptr;