Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.

2025-09-02 00:57:09 +00:00 · 2023-08-30 09:43:56 -04:00
parent d55cbbee32
commit 987546c63b
13 changed files with 512 additions and 5 deletions
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -66,6 +66,7 @@ class GPT4All:
        model_type: Optional[str] = None,
        allow_download: bool = True,
        n_threads: Optional[int] = None,
+        device: Optional[str] = "cpu",
    ):
        """
        Constructor
@@ -78,11 +79,22 @@ class GPT4All:
                descriptive identifier for user. Default is None.
            allow_download: Allow API to download models from gpt4all.io. Default is True.
            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
+            device: The processing unit on which the GPT4All model will run. It can be set to:
+                - "cpu": Model will run on the central processing unit.
+                - "gpu": Model will run on the best available graphics processing unit, irrespective of its vendor.
+                - "amd", "nvidia", "intel": Model will run on the best available GPU from the specified vendor.
+                Alternatively, a specific GPU name can also be provided, and the model will run on the GPU that matches the name if it's available.
+                Default is "cpu".
+
+                Note: If a selected GPU device does not have sufficient RAM to accommodate the model, an error will be thrown, and the GPT4All instance will be rendered invalid. It's advised to ensure the device has enough memory before initiating the model.
        """
        self.model_type = model_type
        self.model = pyllmodel.LLModel()
        # Retrieve model and download if allowed
        self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download)
+        if device is not None:
+            if device != "cpu":
+                self.model.init_gpu(model_path=self.config["path"], device=device)
        self.model.load_model(self.config["path"])
        # Set n_threads
        if n_threads is not None:
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@@ -70,6 +70,14 @@ class LLModelPromptContext(ctypes.Structure):
        ("context_erase", ctypes.c_float),
    ]

+class LLModelGPUDevice(ctypes.Structure):
+    _fields_ = [
+        ("index", ctypes.c_int32),
+        ("type", ctypes.c_int32),
+        ("heapSize", ctypes.c_size_t),
+        ("name", ctypes.c_char_p),
+        ("vendor", ctypes.c_char_p),
+    ]

 # Define C function signatures using ctypes
 llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
@@ -125,6 +133,20 @@ llmodel.llmodel_threadCount.restype = ctypes.c_int32

 llmodel.llmodel_set_implementation_search_path(MODEL_LIB_PATH.encode("utf-8"))

+llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
+llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
+
+llmodel.llmodel_gpu_init_gpu_device_by_string.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_char_p]
+llmodel.llmodel_gpu_init_gpu_device_by_string.restype = ctypes.c_bool
+
+llmodel.llmodel_gpu_init_gpu_device_by_struct.argtypes = [ctypes.c_void_p, ctypes.POINTER(LLModelGPUDevice)]
+llmodel.llmodel_gpu_init_gpu_device_by_struct.restype = ctypes.c_bool
+
+llmodel.llmodel_gpu_init_gpu_device_by_int.argtypes = [ctypes.c_void_p, ctypes.c_int32]
+llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool
+
+llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
+llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool

 ResponseCallbackType = Callable[[int, str], bool]
 RawResponseCallbackType = Callable[[int, bytes], bool]
@@ -169,6 +191,60 @@ class LLModel:
        else:
            raise ValueError("Unable to instantiate model")

+    def list_gpu(self, model_path: str) -> list:
+        """
+        Lists available GPU devices that satisfy the model's memory requirements.
+
+        Parameters
+        ----------
+        model_path : str
+            Path to the model.
+
+        Returns
+        -------
+        list
+            A list of LLModelGPUDevice structures representing available GPU devices.
+        """
+        if self.model is not None:
+            model_path_enc = model_path.encode("utf-8")
+            mem_required = llmodel.llmodel_required_mem(self.model, model_path_enc)
+        else:
+            mem_required = self.memory_needed(model_path)
+        num_devices = ctypes.c_int32(0)
+        devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
+        if not devices_ptr:
+            raise ValueError("Unable to retrieve available GPU devices")
+        devices = [devices_ptr[i] for i in range(num_devices.value)]
+        return devices
+
+    def init_gpu(self, model_path: str, device: str):
+        if self.model is not None:
+            model_path_enc = model_path.encode("utf-8")
+            mem_required = llmodel.llmodel_required_mem(self.model, model_path_enc)
+        else:
+            mem_required = self.memory_needed(model_path)
+        device_enc = device.encode("utf-8")
+        success = self.llmodel_lib.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device_enc)
+        if not success:
+            # Retrieve all GPUs without considering memory requirements.
+            num_devices = ctypes.c_int32(0)
+            all_devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
+            if not all_devices_ptr:
+                raise ValueError("Unable to retrieve list of all GPU devices")
+            all_gpus = [all_devices_ptr[i].name.decode('utf-8') for i in range(num_devices.value)]
+
+            # Retrieve GPUs that meet the memory requirements using list_gpu
+            available_gpus = [device.name.decode('utf-8') for device in self.list_gpu(model_path)]
+
+            # Identify GPUs that are unavailable due to insufficient memory or features
+            unavailable_gpus = set(all_gpus) - set(available_gpus)
+
+            # Formulate the error message
+            error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
+            error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
+            error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
+            raise ValueError(error_msg)
+
    def load_model(self, model_path: str) -> bool:
        """
        Load model from a file.