mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-11-13 23:02:38 +00:00
Fallback to CPU more robustly.
This commit is contained in:
Submodule gpt4all-backend/llama.cpp-mainline updated: 7ff671e149...703ef9c125
@@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
|||||||
|
|
||||||
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
|
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
|
||||||
if (!d_ptr->ctx) {
|
if (!d_ptr->ctx) {
|
||||||
|
#ifdef GGML_USE_KOMPUTE
|
||||||
|
// Explicitly free the device so next load it doesn't use it
|
||||||
|
ggml_vk_free_device();
|
||||||
|
#endif
|
||||||
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
|
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {
|
|||||||
|
|
||||||
LLamaModel::~LLamaModel()
|
LLamaModel::~LLamaModel()
|
||||||
{
|
{
|
||||||
if(d_ptr->ctx) {
|
if (d_ptr->ctx) {
|
||||||
llama_free(d_ptr->ctx);
|
llama_free(d_ptr->ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -294,9 +294,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
emit reportDevice(actualDevice);
|
emit reportDevice(actualDevice);
|
||||||
|
|
||||||
bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
|
bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
|
||||||
|
if (!success && actualDevice != "CPU") {
|
||||||
|
emit reportDevice("CPU");
|
||||||
|
success = m_llModelInfo.model->loadModel(filePath.toStdString());
|
||||||
|
}
|
||||||
|
|
||||||
MySettings::globalInstance()->setAttemptModelLoad(QString());
|
MySettings::globalInstance()->setAttemptModelLoad(QString());
|
||||||
if (!success) {
|
if (!success) {
|
||||||
delete std::exchange(m_llModelInfo.model, nullptr);
|
delete m_llModelInfo.model;
|
||||||
|
m_llModelInfo.model = nullptr;
|
||||||
if (!m_isServer)
|
if (!m_isServer)
|
||||||
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
||||||
m_llModelInfo = LLModelInfo();
|
m_llModelInfo = LLModelInfo();
|
||||||
@@ -317,7 +323,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
case 'S': m_llModelType = LLModelType::STARCODER_; break;
|
case 'S': m_llModelType = LLModelType::STARCODER_; break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
delete std::exchange(m_llModelInfo.model, nullptr);
|
delete m_llModelInfo.model;
|
||||||
|
m_llModelInfo.model = nullptr;
|
||||||
if (!m_isServer)
|
if (!m_isServer)
|
||||||
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
||||||
m_llModelInfo = LLModelInfo();
|
m_llModelInfo = LLModelInfo();
|
||||||
|
|||||||
Reference in New Issue
Block a user