mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.
This commit is contained in:
@@ -20,7 +20,7 @@ endif()
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
set(LLMODEL_VERSION_MAJOR 0)
|
||||
set(LLMODEL_VERSION_MINOR 3)
|
||||
set(LLMODEL_VERSION_MINOR 4)
|
||||
set(LLMODEL_VERSION_PATCH 0)
|
||||
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
|
||||
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
|
||||
@@ -39,6 +39,8 @@ else()
|
||||
message(STATUS "Interprocedural optimization support detected")
|
||||
endif()
|
||||
|
||||
set(LLAMA_KOMPUTE YES)
|
||||
|
||||
include(llama.cpp.cmake)
|
||||
|
||||
set(BUILD_VARIANTS default avxonly)
|
||||
|
Submodule gpt4all-backend/llama.cpp-mainline updated: acfc5478ff...4cdaa3c9cb
@@ -1,3 +1,11 @@
|
||||
#
|
||||
# Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
#
|
||||
# This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
# version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
# this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
#
|
||||
|
||||
cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
@@ -145,6 +153,129 @@ if (LLAMA_OPENBLAS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_KOMPUTE)
|
||||
find_package(Vulkan COMPONENTS glslc REQUIRED)
|
||||
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
|
||||
if (NOT glslc_executable)
|
||||
message(FATAL_ERROR "glslc not found")
|
||||
endif()
|
||||
|
||||
set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-mainline)
|
||||
|
||||
function(compile_shader)
|
||||
set(options)
|
||||
set(oneValueArgs)
|
||||
set(multiValueArgs SOURCES)
|
||||
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
foreach(source ${compile_shader_SOURCES})
|
||||
get_filename_component(OP_FILE ${source} NAME)
|
||||
set(spv_file ${CMAKE_CURRENT_BINARY_DIR}/${OP_FILE}.spv)
|
||||
add_custom_command(
|
||||
OUTPUT ${spv_file}
|
||||
DEPENDS ${LLAMA_DIR}/${source}
|
||||
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${LLAMA_DIR}/${source}
|
||||
COMMENT "Compiling ${source} to ${source}.spv"
|
||||
)
|
||||
|
||||
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
|
||||
set(FILE_NAME "shader${RAW_FILE_NAME}")
|
||||
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
|
||||
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
|
||||
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
|
||||
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
|
||||
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${spv_file} >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
DEPENDS ${spv_file} xxd
|
||||
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
|
||||
)
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
if (EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt")
|
||||
message(STATUS "Kompute found")
|
||||
add_subdirectory(${LLAMA_DIR}/kompute)
|
||||
|
||||
# Compile our shaders
|
||||
compile_shader(SOURCES
|
||||
kompute/op_scale.comp
|
||||
kompute/op_add.comp
|
||||
kompute/op_addrow.comp
|
||||
kompute/op_mul.comp
|
||||
kompute/op_mulrow.comp
|
||||
kompute/op_silu.comp
|
||||
kompute/op_relu.comp
|
||||
kompute/op_gelu.comp
|
||||
kompute/op_softmax.comp
|
||||
kompute/op_norm.comp
|
||||
kompute/op_rmsnorm.comp
|
||||
kompute/op_diagmask.comp
|
||||
kompute/op_mul_mat_f16.comp
|
||||
kompute/op_mul_mat_q4_0.comp
|
||||
kompute/op_mul_mat_q4_1.comp
|
||||
kompute/op_getrows_f16.comp
|
||||
kompute/op_getrows_q4_0.comp
|
||||
kompute/op_getrows_q4_1.comp
|
||||
kompute/op_rope.comp
|
||||
kompute/op_cpy_f16_f16.comp
|
||||
kompute/op_cpy_f16_f32.comp
|
||||
kompute/op_cpy_f32_f16.comp
|
||||
kompute/op_cpy_f32_f32.comp
|
||||
)
|
||||
|
||||
# Create a custom target for our generated shaders
|
||||
add_custom_target(generated_shaders DEPENDS
|
||||
shaderop_scale.h
|
||||
shaderop_add.h
|
||||
shaderop_addrow.h
|
||||
shaderop_mul.h
|
||||
shaderop_mulrow.h
|
||||
shaderop_silu.h
|
||||
shaderop_relu.h
|
||||
shaderop_gelu.h
|
||||
shaderop_softmax.h
|
||||
shaderop_norm.h
|
||||
shaderop_rmsnorm.h
|
||||
shaderop_diagmask.h
|
||||
shaderop_mul_mat_f16.h
|
||||
shaderop_mul_mat_q4_0.h
|
||||
shaderop_mul_mat_q4_1.h
|
||||
shaderop_getrows_f16.h
|
||||
shaderop_getrows_q4_0.h
|
||||
shaderop_getrows_q4_1.h
|
||||
shaderop_rope.h
|
||||
shaderop_cpy_f16_f16.h
|
||||
shaderop_cpy_f16_f32.h
|
||||
shaderop_cpy_f32_f16.h
|
||||
shaderop_cpy_f32_f32.h
|
||||
)
|
||||
|
||||
# Create a custom command that depends on the generated_shaders
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan.stamp
|
||||
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan.stamp
|
||||
DEPENDS generated_shaders
|
||||
COMMENT "Ensuring shaders are generated before compiling ggml-vulkan.cpp"
|
||||
)
|
||||
|
||||
# Add the stamp to the main sources to ensure dependency tracking
|
||||
set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-vulkan.cpp ${LLAMA_DIR}/ggml-vulkan.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan.stamp)
|
||||
add_compile_definitions(GGML_USE_KOMPUTE)
|
||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
|
||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
|
||||
else()
|
||||
message(WARNING "Kompute not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(c_flags
|
||||
@@ -301,7 +432,8 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA)
|
||||
${GGML_SOURCES_QUANT_K}
|
||||
${GGML_SOURCES_CUDA}
|
||||
${GGML_METAL_SOURCES}
|
||||
${GGML_OPENCL_SOURCES})
|
||||
${GGML_OPENCL_SOURCES}
|
||||
${GGML_SOURCES_KOMPUTE})
|
||||
|
||||
if (LLAMA_K_QUANTS)
|
||||
target_compile_definitions(ggml${SUFFIX} PUBLIC GGML_USE_K_QUANTS)
|
||||
|
@@ -28,6 +28,9 @@
|
||||
#include <llama.h>
|
||||
#include <ggml.h>
|
||||
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
const char *modelType_ = "LLaMA";
|
||||
@@ -155,6 +158,13 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
||||
// currently
|
||||
d_ptr->params.n_gpu_layers = 1;
|
||||
#endif
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
if (ggml_vk_has_device()) {
|
||||
// vulkan always runs the whole model if n_gpu_layers is not 0, at least
|
||||
// currently
|
||||
d_ptr->params.n_gpu_layers = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
|
||||
if (!d_ptr->ctx) {
|
||||
@@ -162,6 +172,12 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
if (ggml_vk_has_device()) {
|
||||
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
d_ptr->modelLoaded = true;
|
||||
fflush(stderr);
|
||||
@@ -252,6 +268,75 @@ const std::vector<LLModel::Token> &LLamaModel::endTokens() const
|
||||
return fres;
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
|
||||
std::vector<LLModel::GPUDevice> LLamaModel::availableGPUDevices(size_t memoryRequired)
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(memoryRequired);
|
||||
|
||||
std::vector<LLModel::GPUDevice> devices;
|
||||
for(const auto& vkDevice : vkDevices) {
|
||||
LLModel::GPUDevice device;
|
||||
device.index = vkDevice.index;
|
||||
device.type = vkDevice.type;
|
||||
device.heapSize = vkDevice.heapSize;
|
||||
device.name = vkDevice.name;
|
||||
device.vendor = vkDevice.vendor;
|
||||
|
||||
devices.push_back(device);
|
||||
}
|
||||
|
||||
return devices;
|
||||
#else
|
||||
return std::vector<LLModel::GPUDevice>();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::initializeGPUDevice(size_t memoryRequired, const std::string& device)
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return ggml_vk_init_device(memoryRequired, device);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::initializeGPUDevice(const LLModel::GPUDevice &device)
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
ggml_vk_device vkDevice;
|
||||
vkDevice.index = device.index;
|
||||
vkDevice.type = device.type;
|
||||
vkDevice.heapSize = device.heapSize;
|
||||
vkDevice.name = device.name;
|
||||
vkDevice.vendor = device.vendor;
|
||||
return ggml_vk_init_device(vkDevice);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::initializeGPUDevice(int device)
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return ggml_vk_init_device(device);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::hasGPUDevice()
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return ggml_vk_has_device();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define DLL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
|
@@ -25,6 +25,11 @@ public:
|
||||
size_t restoreState(const uint8_t *src) override;
|
||||
void setThreadCount(int32_t n_threads) override;
|
||||
int32_t threadCount() const override;
|
||||
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) override;
|
||||
bool initializeGPUDevice(size_t memoryRequired, const std::string& device) override;
|
||||
bool initializeGPUDevice(const GPUDevice &device) override;
|
||||
bool initializeGPUDevice(int device) override;
|
||||
bool hasGPUDevice() override;
|
||||
|
||||
private:
|
||||
LLamaPrivate *d_ptr;
|
||||
|
@@ -58,6 +58,14 @@ public:
|
||||
// window
|
||||
};
|
||||
|
||||
struct GPUDevice {
|
||||
int index = 0;
|
||||
int type = 0;
|
||||
size_t heapSize = 0;
|
||||
std::string name;
|
||||
std::string vendor;
|
||||
};
|
||||
|
||||
explicit LLModel() {}
|
||||
virtual ~LLModel() {}
|
||||
|
||||
@@ -87,6 +95,12 @@ public:
|
||||
return *m_implementation;
|
||||
}
|
||||
|
||||
virtual std::vector<GPUDevice> availableGPUDevices(size_t /*memoryRequired*/) { return std::vector<GPUDevice>(); }
|
||||
virtual bool initializeGPUDevice(size_t /*memoryRequired*/, const std::string& /*device*/) { return false; }
|
||||
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
|
||||
virtual bool initializeGPUDevice(int /*device*/) { return false; }
|
||||
virtual bool hasGPUDevice() { return false; }
|
||||
|
||||
protected:
|
||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||
// 'prompt' above calls these functions
|
||||
|
@@ -5,7 +5,6 @@
|
||||
#include <cerrno>
|
||||
#include <utility>
|
||||
|
||||
|
||||
struct LLModelWrapper {
|
||||
LLModel *llModel = nullptr;
|
||||
LLModel::PromptContext promptContext;
|
||||
@@ -210,3 +209,57 @@ const char *llmodel_get_implementation_search_path()
|
||||
{
|
||||
return LLModel::Implementation::implementationsSearchPath().c_str();
|
||||
}
|
||||
|
||||
struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
std::vector<LLModel::GPUDevice> devices = wrapper->llModel->availableGPUDevices(memoryRequired);
|
||||
|
||||
// Set the num_devices
|
||||
*num_devices = devices.size();
|
||||
|
||||
if (*num_devices == 0) return nullptr; // Return nullptr if no devices are found
|
||||
|
||||
// Allocate memory for the output array
|
||||
struct llmodel_gpu_device* output = (struct llmodel_gpu_device*) malloc(*num_devices * sizeof(struct llmodel_gpu_device));
|
||||
|
||||
for (int i = 0; i < *num_devices; i++) {
|
||||
output[i].index = devices[i].index;
|
||||
output[i].type = devices[i].type;
|
||||
output[i].heapSize = devices[i].heapSize;
|
||||
output[i].name = strdup(devices[i].name.c_str()); // Convert std::string to char* and allocate memory
|
||||
output[i].vendor = strdup(devices[i].vendor.c_str()); // Convert std::string to char* and allocate memory
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
return wrapper->llModel->initializeGPUDevice(memoryRequired, std::string(device));
|
||||
}
|
||||
|
||||
bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gpu_device *device)
|
||||
{
|
||||
LLModel::GPUDevice d;
|
||||
d.index = device->index;
|
||||
d.type = device->type;
|
||||
d.heapSize = device->heapSize;
|
||||
d.name = device->name;
|
||||
d.vendor = device->vendor;
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
return wrapper->llModel->initializeGPUDevice(d);
|
||||
}
|
||||
|
||||
bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
return wrapper->llModel->initializeGPUDevice(device);
|
||||
}
|
||||
|
||||
bool llmodel_has_gpu_device(llmodel_model model)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
return wrapper->llModel->hasGPUDevice();
|
||||
}
|
||||
|
@@ -56,8 +56,18 @@ struct llmodel_prompt_context {
|
||||
int32_t repeat_last_n; // last n tokens to penalize
|
||||
float context_erase; // percent of context to erase if we exceed the context window
|
||||
};
|
||||
|
||||
struct llmodel_gpu_device {
|
||||
int index = 0;
|
||||
int type = 0; // same as VkPhysicalDeviceType
|
||||
size_t heapSize = 0;
|
||||
const char * name;
|
||||
const char * vendor;
|
||||
};
|
||||
|
||||
#ifndef __cplusplus
|
||||
typedef struct llmodel_prompt_context llmodel_prompt_context;
|
||||
typedef struct llmodel_gpu_device llmodel_gpu_device;
|
||||
#endif
|
||||
|
||||
/**
|
||||
@@ -218,6 +228,50 @@ void llmodel_set_implementation_search_path(const char *path);
|
||||
*/
|
||||
const char *llmodel_get_implementation_search_path();
|
||||
|
||||
/**
|
||||
* Get a list of available GPU devices given the memory required.
|
||||
* @return A pointer to an array of llmodel_gpu_device's whose number is given by num_devices.
|
||||
*/
|
||||
struct llmodel_gpu_device* llmodel_available_gpu_devices(llmodel_model model, size_t memoryRequired, int* num_devices);
|
||||
|
||||
/**
|
||||
* Initializes a GPU device based on a specified string criterion.
|
||||
*
|
||||
* This function initializes a GPU device based on a string identifier provided. The function
|
||||
* allows initialization based on general device type ("gpu"), vendor name ("amd", "nvidia", "intel"),
|
||||
* or any specific device name.
|
||||
*
|
||||
* @param memoryRequired The amount of memory (in bytes) required by the application or task
|
||||
* that will utilize the GPU device.
|
||||
* @param device A string specifying the desired criterion for GPU device selection. It can be:
|
||||
* - "gpu": To initialize the best available GPU.
|
||||
* - "amd", "nvidia", or "intel": To initialize the best available GPU from that vendor.
|
||||
* - A specific GPU device name: To initialize a GPU with that exact name.
|
||||
*
|
||||
* @return True if the GPU device is successfully initialized based on the provided string
|
||||
* criterion. Returns false if the desired GPU device could not be initialized.
|
||||
*/
|
||||
bool llmodel_gpu_init_gpu_device_by_string(llmodel_model model, size_t memoryRequired, const char *device);
|
||||
|
||||
/**
|
||||
* Initializes a GPU device by specifying a valid gpu device pointer.
|
||||
* @param device A gpu device pointer.
|
||||
* @return True if the GPU device is successfully initialized, false otherwise.
|
||||
*/
|
||||
bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gpu_device *device);
|
||||
|
||||
/**
|
||||
* Initializes a GPU device by its index.
|
||||
* @param device An integer representing the index of the GPU device to be initialized.
|
||||
* @return True if the GPU device is successfully initialized, false otherwise.
|
||||
*/
|
||||
bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
|
||||
|
||||
/**
|
||||
* @return True if a GPU device is successfully initialized, false otherwise.
|
||||
*/
|
||||
bool llmodel_has_gpu_device(llmodel_model model);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -4,6 +4,49 @@
|
||||
#include <vector>
|
||||
#include <ggml.h>
|
||||
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
#include "ggml-vulkan.h"
|
||||
struct llm_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
size_t size = 0;
|
||||
ggml_vk_memory memory;
|
||||
|
||||
llm_buffer() = default;
|
||||
|
||||
void resize(size_t size) {
|
||||
free();
|
||||
|
||||
if (!ggml_vk_has_device()) {
|
||||
this->addr = new uint8_t[size];
|
||||
this->size = size;
|
||||
} else {
|
||||
this->memory = ggml_vk_allocate(size);
|
||||
this->addr = (uint8_t*)memory.data;
|
||||
this->size = size;
|
||||
}
|
||||
}
|
||||
|
||||
void free() {
|
||||
if (!memory.primaryMemory) {
|
||||
delete[] addr;
|
||||
} else if (memory.data) {
|
||||
ggml_vk_free_memory(memory);
|
||||
}
|
||||
this->addr = NULL;
|
||||
this->size = 0;
|
||||
}
|
||||
|
||||
~llm_buffer() {
|
||||
free();
|
||||
}
|
||||
|
||||
// disable copy and move
|
||||
llm_buffer(const llm_buffer&) = delete;
|
||||
llm_buffer(llm_buffer&&) = delete;
|
||||
llm_buffer& operator=(const llm_buffer&) = delete;
|
||||
llm_buffer& operator=(llm_buffer&&) = delete;
|
||||
};
|
||||
#else
|
||||
struct llm_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
size_t size = 0;
|
||||
@@ -18,6 +61,7 @@ struct llm_buffer {
|
||||
delete[] addr;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
struct llm_kv_cache {
|
||||
struct ggml_tensor * k;
|
||||
|
Reference in New Issue
Block a user