mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
python: add list_gpus to the GPT4All API (#2194)
Other changes: * fix memory leak in llmodel_available_gpu_devices * drop model argument from llmodel_available_gpu_devices * breaking: make GPT4All/Embed4All arguments past model_name keyword-only Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -213,9 +213,9 @@ LLModel *LLModel::Implementation::constructDefaultLlama() {
|
||||
return llama.get();
|
||||
}
|
||||
|
||||
std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
|
||||
std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices(size_t memoryRequired) {
|
||||
auto *llama = constructDefaultLlama();
|
||||
if (llama) { return llama->availableGPUDevices(0); }
|
||||
if (llama) { return llama->availableGPUDevices(memoryRequired); }
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@@ -38,7 +38,7 @@ public:
|
||||
std::string_view buildVariant() const { return m_buildVariant; }
|
||||
|
||||
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048);
|
||||
static std::vector<GPUDevice> availableGPUDevices();
|
||||
static std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0);
|
||||
static int32_t maxContextLength(const std::string &modelPath);
|
||||
static int32_t layerCount(const std::string &modelPath);
|
||||
static bool isEmbeddingModel(const std::string &modelPath);
|
||||
|
@@ -4,6 +4,7 @@
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
@@ -221,28 +222,45 @@ const char *llmodel_get_implementation_search_path()
|
||||
return LLModel::Implementation::implementationsSearchPath().c_str();
|
||||
}
|
||||
|
||||
struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices)
|
||||
{
|
||||
auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
std::vector<LLModel::GPUDevice> devices = wrapper->llModel->availableGPUDevices(memoryRequired);
|
||||
// RAII wrapper around a C-style struct
|
||||
struct llmodel_gpu_device_cpp: llmodel_gpu_device {
|
||||
llmodel_gpu_device_cpp() = default;
|
||||
|
||||
// Set the num_devices
|
||||
llmodel_gpu_device_cpp(const llmodel_gpu_device_cpp &) = delete;
|
||||
llmodel_gpu_device_cpp( llmodel_gpu_device_cpp &&) = delete;
|
||||
|
||||
const llmodel_gpu_device_cpp &operator=(const llmodel_gpu_device_cpp &) = delete;
|
||||
llmodel_gpu_device_cpp &operator=( llmodel_gpu_device_cpp &&) = delete;
|
||||
|
||||
~llmodel_gpu_device_cpp() {
|
||||
free(const_cast<char *>(name));
|
||||
free(const_cast<char *>(vendor));
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(llmodel_gpu_device_cpp) == sizeof(llmodel_gpu_device));
|
||||
|
||||
struct llmodel_gpu_device *llmodel_available_gpu_devices(size_t memoryRequired, int *num_devices)
|
||||
{
|
||||
static thread_local std::unique_ptr<llmodel_gpu_device_cpp[]> c_devices;
|
||||
|
||||
auto devices = LLModel::Implementation::availableGPUDevices(memoryRequired);
|
||||
*num_devices = devices.size();
|
||||
|
||||
if (*num_devices == 0) return nullptr; // Return nullptr if no devices are found
|
||||
if (devices.empty()) { return nullptr; /* no devices */ }
|
||||
|
||||
// Allocate memory for the output array
|
||||
struct llmodel_gpu_device* output = (struct llmodel_gpu_device*) malloc(*num_devices * sizeof(struct llmodel_gpu_device));
|
||||
|
||||
for (int i = 0; i < *num_devices; i++) {
|
||||
output[i].index = devices[i].index;
|
||||
output[i].type = devices[i].type;
|
||||
output[i].heapSize = devices[i].heapSize;
|
||||
output[i].name = strdup(devices[i].name.c_str()); // Convert std::string to char* and allocate memory
|
||||
output[i].vendor = strdup(devices[i].vendor.c_str()); // Convert std::string to char* and allocate memory
|
||||
c_devices = std::make_unique<llmodel_gpu_device_cpp[]>(devices.size());
|
||||
for (unsigned i = 0; i < devices.size(); i++) {
|
||||
const auto &dev = devices[i];
|
||||
auto &cdev = c_devices[i];
|
||||
cdev.index = dev.index;
|
||||
cdev.type = dev.type;
|
||||
cdev.heapSize = dev.heapSize;
|
||||
cdev.name = strdup(dev.name.c_str());
|
||||
cdev.vendor = strdup(dev.vendor.c_str());
|
||||
}
|
||||
|
||||
return output;
|
||||
return c_devices.get();
|
||||
}
|
||||
|
||||
bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device)
|
||||
|
@@ -48,9 +48,9 @@ struct llmodel_prompt_context {
|
||||
};
|
||||
|
||||
struct llmodel_gpu_device {
|
||||
int index = 0;
|
||||
int type = 0; // same as VkPhysicalDeviceType
|
||||
size_t heapSize = 0;
|
||||
int index;
|
||||
int type; // same as VkPhysicalDeviceType
|
||||
size_t heapSize;
|
||||
const char * name;
|
||||
const char * vendor;
|
||||
};
|
||||
@@ -241,9 +241,10 @@ const char *llmodel_get_implementation_search_path();
|
||||
|
||||
/**
|
||||
* Get a list of available GPU devices given the memory required.
|
||||
* @param memoryRequired The minimum amount of VRAM, in bytes
|
||||
* @return A pointer to an array of llmodel_gpu_device's whose number is given by num_devices.
|
||||
*/
|
||||
struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices);
|
||||
struct llmodel_gpu_device* llmodel_available_gpu_devices(size_t memoryRequired, int* num_devices);
|
||||
|
||||
/**
|
||||
* Initializes a GPU device based on a specified string criterion.
|
||||
|
Reference in New Issue
Block a user