mixpanel: improved GPU device statistics (plus GPU sort order fix) (#2297)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-08-04 09:32:29 +00:00 · 2024-05-01 16:15:48 -04:00 · 2024-05-01 16:15:48 -04:00 · adaecb7a72
commit adaecb7a72
parent 27c561aeb7
2 changed files with 41 additions and 15 deletions
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@ -1 +1 @@
-Subproject commit e3c4f65d786d26f1daa7aebfb1b67cd6c31ea082
+Subproject commit a3f03b7e793ee611c4918235d4532ee535a9530d
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@ -7,6 +7,16 @@
 #include "mysettings.h"
 #include "../gpt4all-backend/llmodel.h"
 #include <algorithm>
 #include <cctype>
 #include <cmath>
 #include <cstddef>
 #include <functional>
 #include <limits>
 #include <string>
 #include <utility>
 #include <vector>
 #include <QElapsedTimer>
 //#define DEBUG
@ -344,20 +354,36 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    return m_shouldBeLoaded;
                });
-                // Pick the best match for the device
+                emit reportFallbackReason(""); // no fallback yet
                QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU";
                const QString requestedDevice = MySettings::globalInstance()->device();
                if (requestedDevice == "CPU") {
                    emit reportFallbackReason(""); // fallback not applicable
                } else {
                    const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl);
                    std::vector<LLModel::GPUDevice> availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
                    LLModel::GPUDevice *device = nullptr;
-                    if (!availableDevices.empty() && requestedDevice == "Auto" && availableDevices.front().type == 2 /*a discrete gpu*/) {
+                auto approxDeviceMemGB = [](const LLModel::GPUDevice *dev) {
-                        device = &availableDevices.front();
+                    float memGB = dev->heapSize / float(1024 * 1024 * 1024);
-                    } else {
+                    return std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
-                        for (LLModel::GPUDevice &d : availableDevices) {
+                };
                std::vector<LLModel::GPUDevice> availableDevices;
                const LLModel::GPUDevice *defaultDevice = nullptr;
                {
                    const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl);
                    availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
                    if (!availableDevices.empty() && availableDevices.front().type == 2 /*a discrete gpu*/) {
                        defaultDevice = &availableDevices.front();
                        float memGB = defaultDevice->heapSize / float(1024 * 1024 * 1024);
                        memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
                        modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name));
                        modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice));
                    }
                }
                const QString requestedDevice = MySettings::globalInstance()->device();
                bool isMetal = m_llModelInfo.model->implementation().buildVariant() == "metal";
                // Pick the best match for the device
                QString actualDevice = isMetal ? "Metal" : "CPU";
                if (!isMetal && requestedDevice != "CPU") {
                    const auto *device = defaultDevice;
                    if (requestedDevice != "Auto") {
                        for (const LLModel::GPUDevice &d : availableDevices) {
                            if (QString::fromStdString(d.name) == requestedDevice) {
                                device = &d;
                                break;
@ -365,7 +391,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                        }
                    }
                    emit reportFallbackReason(""); // no fallback yet
                    std::string unavail_reason;
                    if (!device) {
                        // GPU not available
@ -373,6 +398,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                        emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
                    } else {
                        actualDevice = QString::fromStdString(device->name);
                        modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
                    }
                }
		`@ -1 +1 @@`
			`Subproject commit e3c4f65d786d26f1daa7aebfb1b67cd6c31ea082`				`Subproject commit a3f03b7e793ee611c4918235d4532ee535a9530d`