Fallback to CPU more robustly.

This commit is contained in:
Adam Treat
2023-09-14 16:52:31 -04:00
parent 79843c269e
commit aa33419c6e
3 changed files with 15 additions and 4 deletions

View File

@@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
if (!d_ptr->ctx) {
#ifdef GGML_USE_KOMPUTE
// Explicitly free the device so next load it doesn't use it
ggml_vk_free_device();
#endif
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
return false;
}
@@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {
LLamaModel::~LLamaModel()
{
if(d_ptr->ctx) {
if (d_ptr->ctx) {
llama_free(d_ptr->ctx);
}
}