mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-04 09:32:29 +00:00
mixpanel: improved GPU device statistics (plus GPU sort order fix) (#2297)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
27c561aeb7
commit
adaecb7a72
@ -1 +1 @@
|
|||||||
Subproject commit e3c4f65d786d26f1daa7aebfb1b67cd6c31ea082
|
Subproject commit a3f03b7e793ee611c4918235d4532ee535a9530d
|
@ -7,6 +7,16 @@
|
|||||||
#include "mysettings.h"
|
#include "mysettings.h"
|
||||||
#include "../gpt4all-backend/llmodel.h"
|
#include "../gpt4all-backend/llmodel.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <QElapsedTimer>
|
#include <QElapsedTimer>
|
||||||
|
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
@ -344,20 +354,36 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
return m_shouldBeLoaded;
|
return m_shouldBeLoaded;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Pick the best match for the device
|
emit reportFallbackReason(""); // no fallback yet
|
||||||
QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU";
|
|
||||||
const QString requestedDevice = MySettings::globalInstance()->device();
|
|
||||||
if (requestedDevice == "CPU") {
|
|
||||||
emit reportFallbackReason(""); // fallback not applicable
|
|
||||||
} else {
|
|
||||||
const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl);
|
|
||||||
std::vector<LLModel::GPUDevice> availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
|
|
||||||
LLModel::GPUDevice *device = nullptr;
|
|
||||||
|
|
||||||
if (!availableDevices.empty() && requestedDevice == "Auto" && availableDevices.front().type == 2 /*a discrete gpu*/) {
|
auto approxDeviceMemGB = [](const LLModel::GPUDevice *dev) {
|
||||||
device = &availableDevices.front();
|
float memGB = dev->heapSize / float(1024 * 1024 * 1024);
|
||||||
} else {
|
return std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
|
||||||
for (LLModel::GPUDevice &d : availableDevices) {
|
};
|
||||||
|
|
||||||
|
std::vector<LLModel::GPUDevice> availableDevices;
|
||||||
|
const LLModel::GPUDevice *defaultDevice = nullptr;
|
||||||
|
{
|
||||||
|
const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl);
|
||||||
|
availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
|
||||||
|
if (!availableDevices.empty() && availableDevices.front().type == 2 /*a discrete gpu*/) {
|
||||||
|
defaultDevice = &availableDevices.front();
|
||||||
|
float memGB = defaultDevice->heapSize / float(1024 * 1024 * 1024);
|
||||||
|
memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
|
||||||
|
modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name));
|
||||||
|
modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString requestedDevice = MySettings::globalInstance()->device();
|
||||||
|
bool isMetal = m_llModelInfo.model->implementation().buildVariant() == "metal";
|
||||||
|
|
||||||
|
// Pick the best match for the device
|
||||||
|
QString actualDevice = isMetal ? "Metal" : "CPU";
|
||||||
|
if (!isMetal && requestedDevice != "CPU") {
|
||||||
|
const auto *device = defaultDevice;
|
||||||
|
if (requestedDevice != "Auto") {
|
||||||
|
for (const LLModel::GPUDevice &d : availableDevices) {
|
||||||
if (QString::fromStdString(d.name) == requestedDevice) {
|
if (QString::fromStdString(d.name) == requestedDevice) {
|
||||||
device = &d;
|
device = &d;
|
||||||
break;
|
break;
|
||||||
@ -365,7 +391,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
emit reportFallbackReason(""); // no fallback yet
|
|
||||||
std::string unavail_reason;
|
std::string unavail_reason;
|
||||||
if (!device) {
|
if (!device) {
|
||||||
// GPU not available
|
// GPU not available
|
||||||
@ -373,6 +398,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
|
emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
|
||||||
} else {
|
} else {
|
||||||
actualDevice = QString::fromStdString(device->name);
|
actualDevice = QString::fromStdString(device->name);
|
||||||
|
modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user