mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-11 05:19:31 +00:00
chat: fix blank device in UI and improve Mixpanel reporting (#2409)
Also remove LLModel::hasGPUDevice. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -93,6 +93,12 @@ void LLModelStore::destroy()
|
||||
m_availableModel.reset();
|
||||
}
|
||||
|
||||
void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) {
|
||||
this->model.reset(model);
|
||||
fallbackReason.reset();
|
||||
emit cllm->loadedModelInfoChanged();
|
||||
}
|
||||
|
||||
ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
||||
: QObject{nullptr}
|
||||
, m_promptResponseTokens(0)
|
||||
@@ -141,7 +147,7 @@ void ChatLLM::destroy()
|
||||
// The only time we should have a model loaded here is on shutdown
|
||||
// as we explicitly unload the model in all other circumstances
|
||||
if (isModelLoaded()) {
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,7 +214,7 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
|
||||
QString filePath = modelInfo.dirpath + modelInfo.filename();
|
||||
QFileInfo fileInfo(filePath);
|
||||
|
||||
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
||||
acquireModel();
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
||||
#endif
|
||||
@@ -251,8 +257,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
// reset status
|
||||
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
||||
emit modelLoadingError("");
|
||||
emit reportFallbackReason("");
|
||||
emit reportDevice("");
|
||||
m_pristineLoadedState = false;
|
||||
|
||||
QString filePath = modelInfo.dirpath + modelInfo.filename();
|
||||
@@ -265,12 +269,12 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
qDebug() << "already acquired model deleted" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
||||
#endif
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
} else if (!m_isServer) {
|
||||
// This is a blocking call that tries to retrieve the model we need from the model store.
|
||||
// If it succeeds, then we just have to restore state. If the store has never had a model
|
||||
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
|
||||
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
||||
acquireModel();
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
||||
#endif
|
||||
@@ -305,7 +309,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
qDebug() << "deleting model" << m_llmThread.objectName() << m_llModelInfo.model.get();
|
||||
#endif
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -335,7 +339,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
model->setModelName(modelName);
|
||||
model->setRequestURL(modelInfo.url());
|
||||
model->setAPIKey(apiKey);
|
||||
m_llModelInfo.model.reset(model);
|
||||
m_llModelInfo.resetModel(this, model);
|
||||
} else {
|
||||
QElapsedTimer modelLoadTimer;
|
||||
modelLoadTimer.start();
|
||||
@@ -360,10 +364,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
#endif
|
||||
|
||||
QString constructError;
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
try {
|
||||
auto *model = LLModel::Implementation::construct(filePath.toStdString(), backend, n_ctx);
|
||||
m_llModelInfo.model.reset(model);
|
||||
m_llModelInfo.resetModel(this, model);
|
||||
} catch (const LLModel::MissingImplementationError &e) {
|
||||
modelLoadProps.insert("error", "missing_model_impl");
|
||||
constructError = e.what();
|
||||
@@ -412,14 +416,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
|
||||
modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name));
|
||||
modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice));
|
||||
modelLoadProps.insert("default_device_backend", QString::fromStdString(defaultDevice->backendName()));
|
||||
}
|
||||
}
|
||||
|
||||
QString actualDevice("CPU");
|
||||
bool actualDeviceIsCPU = true;
|
||||
|
||||
#if defined(Q_OS_MAC) && defined(__aarch64__)
|
||||
if (m_llModelInfo.model->implementation().buildVariant() == "metal")
|
||||
actualDevice = "Metal";
|
||||
actualDeviceIsCPU = false;
|
||||
#else
|
||||
if (requestedDevice != "CPU") {
|
||||
const auto *device = defaultDevice;
|
||||
@@ -437,41 +442,39 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
if (!device) {
|
||||
// GPU not available
|
||||
} else if (!m_llModelInfo.model->initializeGPUDevice(device->index, &unavail_reason)) {
|
||||
emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
|
||||
m_llModelInfo.fallbackReason = QString::fromStdString(unavail_reason);
|
||||
} else {
|
||||
actualDevice = QString::fromStdString(device->reportedName());
|
||||
actualDeviceIsCPU = false;
|
||||
modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Report which device we're actually using
|
||||
emit reportDevice(actualDevice);
|
||||
bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);
|
||||
|
||||
if (!m_shouldBeLoaded) {
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingPercentageChanged(0.0f);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (actualDevice == "CPU") {
|
||||
if (actualDeviceIsCPU) {
|
||||
// we asked llama.cpp to use the CPU
|
||||
} else if (!success) {
|
||||
// llama_init_from_file returned nullptr
|
||||
emit reportDevice("CPU");
|
||||
emit reportFallbackReason("<br>GPU loading failed (out of VRAM?)");
|
||||
m_llModelInfo.fallbackReason = "GPU loading failed (out of VRAM?)";
|
||||
modelLoadProps.insert("cpu_fallback_reason", "gpu_load_failed");
|
||||
success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, 0);
|
||||
|
||||
if (!m_shouldBeLoaded) {
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingPercentageChanged(0.0f);
|
||||
return false;
|
||||
}
|
||||
@@ -479,16 +482,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
// ggml_vk_init was not called in llama.cpp
|
||||
// We might have had to fallback to CPU after load if the model is not possible to accelerate
|
||||
// for instance if the quantization method is not supported on Vulkan yet
|
||||
emit reportDevice("CPU");
|
||||
emit reportFallbackReason("<br>model or quant has no GPU support");
|
||||
m_llModelInfo.fallbackReason = "model or quant has no GPU support";
|
||||
modelLoadProps.insert("cpu_fallback_reason", "gpu_unsupported_model");
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
|
||||
modelLoadProps.insert("error", "loadmodel_failed");
|
||||
} else {
|
||||
@@ -497,10 +499,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
case 'G': m_llModelType = LLModelType::GPTJ_; break;
|
||||
default:
|
||||
{
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingError(u"Could not determine model type for %1"_s.arg(modelInfo.filename()));
|
||||
}
|
||||
}
|
||||
@@ -510,7 +512,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
} else {
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingError(u"Error loading %1: %2"_s.arg(modelInfo.filename(), constructError));
|
||||
}
|
||||
}
|
||||
@@ -523,6 +525,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
fflush(stdout);
|
||||
#endif
|
||||
emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
|
||||
emit loadedModelInfoChanged();
|
||||
|
||||
modelLoadProps.insert("requestedDevice", MySettings::globalInstance()->device());
|
||||
modelLoadProps.insert("model", modelInfo.filename());
|
||||
@@ -530,7 +533,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
} else {
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo)); // release back into the store
|
||||
m_llModelInfo = LLModelInfo();
|
||||
resetModel();
|
||||
emit modelLoadingError(u"Could not find file for model %1"_s.arg(modelInfo.filename()));
|
||||
}
|
||||
|
||||
@@ -621,6 +624,16 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
|
||||
emit modelInfoChanged(modelInfo);
|
||||
}
|
||||
|
||||
void ChatLLM::acquireModel() {
|
||||
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
||||
emit loadedModelInfoChanged();
|
||||
}
|
||||
|
||||
void ChatLLM::resetModel() {
|
||||
m_llModelInfo = {};
|
||||
emit loadedModelInfoChanged();
|
||||
}
|
||||
|
||||
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
|
||||
{
|
||||
m_shouldBeLoaded = true;
|
||||
@@ -809,7 +822,7 @@ void ChatLLM::unloadModel()
|
||||
#endif
|
||||
|
||||
if (m_forceUnloadModel) {
|
||||
m_llModelInfo.model.reset();
|
||||
m_llModelInfo.resetModel(this);
|
||||
m_forceUnloadModel = false;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user