Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.

This commit is contained in:
Adam Treat
2023-08-30 09:43:56 -04:00
parent d55cbbee32
commit 987546c63b
13 changed files with 512 additions and 5 deletions

View File

@@ -66,6 +66,7 @@ class GPT4All:
model_type: Optional[str] = None,
allow_download: bool = True,
n_threads: Optional[int] = None,
device: Optional[str] = "cpu",
):
"""
Constructor
@@ -78,11 +79,22 @@ class GPT4All:
descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
device: The processing unit on which the GPT4All model will run. It can be set to:
- "cpu": Model will run on the central processing unit.
- "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
- "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
Alternatively, a specific GPU name can also be provided, and the model will run on the GPU that matches the name if it's available.
Default is "cpu".
Note: If a selected GPU device does not have sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the model.
"""
self.model_type = model_type
self.model = pyllmodel.LLModel()
# Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download)
if device is not None:
if device != "cpu":
self.model.init_gpu(model_path=self.config["path"], device=device)
self.model.load_model(self.config["path"])
# Set n_threads
if n_threads is not None:

View File

@@ -70,6 +70,14 @@ class LLModelPromptContext(ctypes.Structure):
("context_erase", ctypes.c_float),
]
class LLModelGPUDevice(ctypes.Structure):
_fields_ = [
("index", ctypes.c_int32),
("type", ctypes.c_int32),
("heapSize", ctypes.c_size_t),
("name", ctypes.c_char_p),
("vendor", ctypes.c_char_p),
]
# Define C function signatures using ctypes
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
@@ -125,6 +133,20 @@ llmodel.llmodel_threadCount.restype = ctypes.c_int32
llmodel.llmodel_set_implementation_search_path(MODEL_LIB_PATH.encode("utf-8"))
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
llmodel.llmodel_gpu_init_gpu_device_by_string.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_char_p]
llmodel.llmodel_gpu_init_gpu_device_by_string.restype = ctypes.c_bool
llmodel.llmodel_gpu_init_gpu_device_by_struct.argtypes = [ctypes.c_void_p, ctypes.POINTER(LLModelGPUDevice)]
llmodel.llmodel_gpu_init_gpu_device_by_struct.restype = ctypes.c_bool
llmodel.llmodel_gpu_init_gpu_device_by_int.argtypes = [ctypes.c_void_p, ctypes.c_int32]
llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool
llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool
ResponseCallbackType = Callable[[int, str], bool]
RawResponseCallbackType = Callable[[int, bytes], bool]
@@ -169,6 +191,60 @@ class LLModel:
else:
raise ValueError("Unable to instantiate model")
def list_gpu(self, model_path: str) -> list:
"""
Lists available GPU devices that satisfy the model's memory requirements.
Parameters
----------
model_path : str
Path to the model.
Returns
-------
list
A list of LLModelGPUDevice structures representing available GPU devices.
"""
if self.model is not None:
model_path_enc = model_path.encode("utf-8")
mem_required = llmodel.llmodel_required_mem(self.model, model_path_enc)
else:
mem_required = self.memory_needed(model_path)
num_devices = ctypes.c_int32(0)
devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
if not devices_ptr:
raise ValueError("Unable to retrieve available GPU devices")
devices = [devices_ptr[i] for i in range(num_devices.value)]
return devices
def init_gpu(self, model_path: str, device: str):
if self.model is not None:
model_path_enc = model_path.encode("utf-8")
mem_required = llmodel.llmodel_required_mem(self.model, model_path_enc)
else:
mem_required = self.memory_needed(model_path)
device_enc = device.encode("utf-8")
success = self.llmodel_lib.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device_enc)
if not success:
# Retrieve all GPUs without considering memory requirements.
num_devices = ctypes.c_int32(0)
all_devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
if not all_devices_ptr:
raise ValueError("Unable to retrieve list of all GPU devices")
all_gpus = [all_devices_ptr[i].name.decode('utf-8') for i in range(num_devices.value)]
# Retrieve GPUs that meet the memory requirements using list_gpu
available_gpus = [device.name.decode('utf-8') for device in self.list_gpu(model_path)]
# Identify GPUs that are unavailable due to insufficient memory or features
unavailable_gpus = set(all_gpus) - set(available_gpus)
# Formulate the error message
error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
raise ValueError(error_msg)
def load_model(self, model_path: str) -> bool:
"""
Load model from a file.