python: add list_gpus to the GPT4All API (#2194)

Other changes: * fix memory leak in llmodel_available_gpu_devices * drop model argument from llmodel_available_gpu_devices * breaking: make GPT4All/Embed4All arguments past model_name keyword-only Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-06 11:00:48 +00:00 · 2024-04-04 14:52:13 -04:00
parent 790320e170
commit 1b84a48c47
8 changed files with 91 additions and 58 deletions
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -213,9 +213,9 @@ LLModel *LLModel::Implementation::constructDefaultLlama() {
    return llama.get();
 }

-std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
+std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices(size_t memoryRequired) {
    auto *llama = constructDefaultLlama();
-    if (llama) { return llama->availableGPUDevices(0); }
+    if (llama) { return llama->availableGPUDevices(memoryRequired); }
    return {};
 }

--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -38,7 +38,7 @@ public:
        std::string_view buildVariant() const { return m_buildVariant; }

        static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048);
-        static std::vector<GPUDevice> availableGPUDevices();
+        static std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0);
        static int32_t maxContextLength(const std::string &modelPath);
        static int32_t layerCount(const std::string &modelPath);
        static bool isEmbeddingModel(const std::string &modelPath);
--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@@ -4,6 +4,7 @@
 #include <cerrno>
 #include <cstring>
 #include <iostream>
+#include <memory>
 #include <optional>
 #include <utility>

@@ -221,28 +222,45 @@ const char *llmodel_get_implementation_search_path()
    return LLModel::Implementation::implementationsSearchPath().c_str();
 }

-struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices)
-{
-    auto *wrapper = static_cast<LLModelWrapper *>(model);
-    std::vector<LLModel::GPUDevice> devices = wrapper->llModel->availableGPUDevices(memoryRequired);
+// RAII wrapper around a C-style struct
+struct llmodel_gpu_device_cpp: llmodel_gpu_device {
+    llmodel_gpu_device_cpp() = default;

-    // Set the num_devices
+    llmodel_gpu_device_cpp(const llmodel_gpu_device_cpp  &) = delete;
+    llmodel_gpu_device_cpp(      llmodel_gpu_device_cpp &&) = delete;
+
+    const llmodel_gpu_device_cpp &operator=(const llmodel_gpu_device_cpp  &) = delete;
+          llmodel_gpu_device_cpp &operator=(      llmodel_gpu_device_cpp &&) = delete;
+
+    ~llmodel_gpu_device_cpp() {
+        free(const_cast<char *>(name));
+        free(const_cast<char *>(vendor));
+    }
+};
+
+static_assert(sizeof(llmodel_gpu_device_cpp) == sizeof(llmodel_gpu_device));
+
+struct llmodel_gpu_device *llmodel_available_gpu_devices(size_t memoryRequired, int *num_devices)
+{
+    static thread_local std::unique_ptr<llmodel_gpu_device_cpp[]> c_devices;
+
+    auto devices = LLModel::Implementation::availableGPUDevices(memoryRequired);
    *num_devices = devices.size();

-    if (*num_devices == 0) return nullptr;  // Return nullptr if no devices are found
+    if (devices.empty()) { return nullptr; /* no devices */ }

-    // Allocate memory for the output array
-    struct llmodel_gpu_device* output = (struct llmodel_gpu_device*) malloc(*num_devices * sizeof(struct llmodel_gpu_device));
-
-    for (int i = 0; i < *num_devices; i++) {
-        output[i].index = devices[i].index;
-        output[i].type = devices[i].type;
-        output[i].heapSize = devices[i].heapSize;
-        output[i].name = strdup(devices[i].name.c_str());  // Convert std::string to char* and allocate memory
-        output[i].vendor = strdup(devices[i].vendor.c_str());  // Convert std::string to char* and allocate memory
+    c_devices = std::make_unique<llmodel_gpu_device_cpp[]>(devices.size());
+    for (unsigned i = 0; i < devices.size(); i++) {
+        const auto &dev  =   devices[i];
+              auto &cdev = c_devices[i];
+        cdev.index    = dev.index;
+        cdev.type     = dev.type;
+        cdev.heapSize = dev.heapSize;
+        cdev.name     = strdup(dev.name.c_str());
+        cdev.vendor   = strdup(dev.vendor.c_str());
    }

-    return output;
+    return c_devices.get();
 }

 bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device)
--- a/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/llmodel_c.h
@@ -48,9 +48,9 @@ struct llmodel_prompt_context {
 };

 struct llmodel_gpu_device {
-    int index = 0;
-    int type = 0;           // same as VkPhysicalDeviceType
-    size_t heapSize = 0;
+    int index;
+    int type; // same as VkPhysicalDeviceType
+    size_t heapSize;
    const char * name;
    const char * vendor;
 };
@@ -241,9 +241,10 @@ const char *llmodel_get_implementation_search_path();

 /**
 * Get a list of available GPU devices given the memory required.
+ * @param memoryRequired The minimum amount of VRAM, in bytes
 * @return A pointer to an array of llmodel_gpu_device's whose number is given by num_devices.
 */
-struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices);
+struct llmodel_gpu_device* llmodel_available_gpu_devices(size_t memoryRequired, int* num_devices);

 /**
 * Initializes a GPU device based on a specified string criterion.