mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-03 17:44:34 +00:00
support the llama.cpp CUDA backend (#2310)
* rebase onto llama.cpp commit ggerganov/llama.cpp@d46dbc76f * support for CUDA backend (enabled by default) * partial support for Occam's Vulkan backend (disabled by default) * partial support for HIP/ROCm backend (disabled by default) * sync llama.cpp.cmake with upstream llama.cpp CMakeLists.txt * changes to GPT4All backend, bindings, and chat UI to handle choice of llama.cpp backend (Kompute or CUDA) * ship CUDA runtime with installed version * make device selection in the UI on macOS actually do something * model whitelist: remove dbrx, mamba, persimmon, plamo; add internlm and starcoder2 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -17,8 +17,8 @@ if(APPLE)
|
||||
endif()
|
||||
|
||||
set(APP_VERSION_MAJOR 2)
|
||||
set(APP_VERSION_MINOR 7)
|
||||
set(APP_VERSION_PATCH 6)
|
||||
set(APP_VERSION_MINOR 8)
|
||||
set(APP_VERSION_PATCH 0)
|
||||
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||
|
||||
# Include the binary directory for the generated header file
|
||||
@@ -65,7 +65,7 @@ add_subdirectory(../gpt4all-backend llmodel)
|
||||
|
||||
set(METAL_SHADER_FILE)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES Darwin)
|
||||
set(METAL_SHADER_FILE ../gpt4all-backend/llama.cpp-mainline/ggml-metal.metal)
|
||||
set(METAL_SHADER_FILE ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib)
|
||||
endif()
|
||||
|
||||
set(APP_ICON_RESOURCE)
|
||||
@@ -185,7 +185,6 @@ if(METAL_SHADER_FILE)
|
||||
set_target_properties(chat PROPERTIES
|
||||
RESOURCE ${METAL_SHADER_FILE}
|
||||
)
|
||||
configure_file(${METAL_SHADER_FILE} bin/ggml-metal.metal COPYONLY)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(chat
|
||||
@@ -207,18 +206,61 @@ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
endif()
|
||||
|
||||
install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
|
||||
install(
|
||||
TARGETS llmodel
|
||||
LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
|
||||
RUNTIME DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN} # .dll
|
||||
)
|
||||
|
||||
# We should probably iterate through the list of the cmake for backend, but these need to be installed
|
||||
# to the this component's dir for the finicky qt installer to work
|
||||
install(TARGETS gptj-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS gptj-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS llama-mainline-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS llama-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS llamamodel-mainline-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS llamamodel-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
if(APPLE)
|
||||
install(TARGETS llamamodel-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
if (LLMODEL_KOMPUTE)
|
||||
set(MODEL_IMPL_TARGETS
|
||||
llamamodel-mainline-kompute
|
||||
llamamodel-mainline-kompute-avxonly
|
||||
gptj-kompute
|
||||
gptj-kompute-avxonly
|
||||
)
|
||||
else()
|
||||
set(MODEL_IMPL_TARGETS
|
||||
llamamodel-mainline-cpu
|
||||
llamamodel-mainline-cpu-avxonly
|
||||
gptj-cpu
|
||||
gptj-cpu-avxonly
|
||||
)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
list(APPEND MODEL_IMPL_TARGETS llamamodel-mainline-metal)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS ${MODEL_IMPL_TARGETS}
|
||||
LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
|
||||
RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll
|
||||
)
|
||||
|
||||
if (LLMODEL_CUDA)
|
||||
set_property(TARGET llamamodel-mainline-cuda llamamodel-mainline-cuda-avxonly
|
||||
APPEND PROPERTY INSTALL_RPATH "$ORIGIN")
|
||||
|
||||
install(
|
||||
TARGETS llamamodel-mainline-cuda
|
||||
llamamodel-mainline-cuda-avxonly
|
||||
RUNTIME_DEPENDENCY_SET llama-cuda-deps
|
||||
LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
|
||||
RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll
|
||||
)
|
||||
if (WIN32)
|
||||
install(
|
||||
RUNTIME_DEPENDENCY_SET llama-cuda-deps
|
||||
PRE_EXCLUDE_REGEXES "^(nvcuda|api-ms-.*)\\.dll$"
|
||||
POST_INCLUDE_REGEXES "(^|[/\\\\])(lib)?(cuda|cublas)" POST_EXCLUDE_REGEXES .
|
||||
DIRECTORIES "${CUDAToolkit_BIN_DIR}"
|
||||
DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CPACK_GENERATOR "IFW")
|
||||
|
@@ -6,9 +6,9 @@ gpt4all-chat from source.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
On Windows and Linux, building GPT4All requires the complete Vulkan SDK. You may download it from here: https://vulkan.lunarg.com/sdk/home
|
||||
You will need a compiler. On Windows, you should install Visual Studio with the C++ Development components. On macOS, you will need the full version of Xcode—Xcode Command Line Tools lacks certain required tools. On Linux, you will need a GCC or Clang toolchain with C++ support.
|
||||
|
||||
macOS users do not need Vulkan, as GPT4All will use Metal instead.
|
||||
On Windows and Linux, building GPT4All with full GPU support requires the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) and the latest [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Note for Linux users
|
||||
|
||||
|
@@ -143,7 +143,7 @@ void ChatLLM::handleThreadStarted()
|
||||
|
||||
void ChatLLM::handleForceMetalChanged(bool forceMetal)
|
||||
{
|
||||
#if defined(Q_OS_MAC) && defined(__arm__)
|
||||
#if defined(Q_OS_MAC) && defined(__aarch64__)
|
||||
m_forceMetal = forceMetal;
|
||||
if (isModelLoaded() && m_shouldBeLoaded) {
|
||||
m_reloadingToChangeVariant = true;
|
||||
@@ -324,19 +324,29 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
QElapsedTimer modelLoadTimer;
|
||||
modelLoadTimer.start();
|
||||
|
||||
auto requestedDevice = MySettings::globalInstance()->device();
|
||||
auto n_ctx = MySettings::globalInstance()->modelContextLength(modelInfo);
|
||||
m_ctx.n_ctx = n_ctx;
|
||||
auto ngl = MySettings::globalInstance()->modelGpuLayers(modelInfo);
|
||||
|
||||
std::string buildVariant = "auto";
|
||||
#if defined(Q_OS_MAC) && defined(__arm__)
|
||||
if (m_forceMetal)
|
||||
buildVariant = "metal";
|
||||
std::string backend = "auto";
|
||||
#ifdef Q_OS_MAC
|
||||
if (requestedDevice == "CPU") {
|
||||
backend = "cpu";
|
||||
} else if (m_forceMetal) {
|
||||
#ifdef __aarch64__
|
||||
backend = "metal";
|
||||
#endif
|
||||
}
|
||||
#else // !defined(Q_OS_MAC)
|
||||
if (requestedDevice.startsWith("CUDA: "))
|
||||
backend = "cuda";
|
||||
#endif
|
||||
|
||||
QString constructError;
|
||||
m_llModelInfo.model.reset();
|
||||
try {
|
||||
auto *model = LLModel::Implementation::construct(filePath.toStdString(), buildVariant, n_ctx);
|
||||
auto *model = LLModel::Implementation::construct(filePath.toStdString(), backend, n_ctx);
|
||||
m_llModelInfo.model.reset(model);
|
||||
} catch (const LLModel::MissingImplementationError &e) {
|
||||
modelLoadProps.insert("error", "missing_model_impl");
|
||||
@@ -378,6 +388,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
{
|
||||
const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString(), n_ctx, ngl);
|
||||
availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
|
||||
// Pick the best device
|
||||
// NB: relies on the fact that Kompute devices are listed first
|
||||
if (!availableDevices.empty() && availableDevices.front().type == 2 /*a discrete gpu*/) {
|
||||
defaultDevice = &availableDevices.front();
|
||||
float memGB = defaultDevice->heapSize / float(1024 * 1024 * 1024);
|
||||
@@ -387,16 +399,18 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
}
|
||||
}
|
||||
|
||||
const QString requestedDevice = MySettings::globalInstance()->device();
|
||||
bool isMetal = m_llModelInfo.model->implementation().buildVariant() == "metal";
|
||||
QString actualDevice("CPU");
|
||||
|
||||
// Pick the best match for the device
|
||||
QString actualDevice = isMetal ? "Metal" : "CPU";
|
||||
if (!isMetal && requestedDevice != "CPU") {
|
||||
#if defined(Q_OS_MAC) && defined(__aarch64__)
|
||||
if (m_llModelInfo.model->implementation().buildVariant() == "metal")
|
||||
actualDevice = "Metal";
|
||||
#else
|
||||
if (requestedDevice != "CPU") {
|
||||
const auto *device = defaultDevice;
|
||||
if (requestedDevice != "Auto") {
|
||||
// Use the selected device
|
||||
for (const LLModel::GPUDevice &d : availableDevices) {
|
||||
if (QString::fromStdString(d.name) == requestedDevice) {
|
||||
if (QString::fromStdString(d.selectionName()) == requestedDevice) {
|
||||
device = &d;
|
||||
break;
|
||||
}
|
||||
@@ -409,14 +423,14 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
} else if (!m_llModelInfo.model->initializeGPUDevice(device->index, &unavail_reason)) {
|
||||
emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
|
||||
} else {
|
||||
actualDevice = QString::fromStdString(device->name);
|
||||
actualDevice = QString::fromStdString(device->reportedName());
|
||||
modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Report which device we're actually using
|
||||
emit reportDevice(actualDevice);
|
||||
|
||||
bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);
|
||||
|
||||
if (!m_shouldBeLoaded) {
|
||||
|
@@ -5,10 +5,7 @@ set(DATA_DIR ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN
|
||||
set(BIN_DIR ${DATA_DIR}/bin)
|
||||
set(Qt6_ROOT_DIR "@Qt6_ROOT_DIR@")
|
||||
set(ENV{LD_LIBRARY_PATH} "${BIN_DIR}:${Qt6_ROOT_DIR}/../lib/")
|
||||
execute_process(COMMAND ${LINUXDEPLOYQT} ${BIN_DIR}/chat -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -bundle-non-qt-libs -qmake=${Qt6_ROOT_DIR}/bin/qmake -verbose=2)
|
||||
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/*llmodel.*)
|
||||
file(COPY ${MYLLMODELLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
|
||||
execute_process(COMMAND ${LINUXDEPLOYQT} ${BIN_DIR}/chat -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -bundle-non-qt-libs -qmake=${Qt6_ROOT_DIR}/bin/qmake -verbose=2 -exclude-libs=libcuda.so.1)
|
||||
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
|
||||
DESTINATION ${DATA_DIR})
|
||||
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-48.png"
|
||||
|
@@ -4,14 +4,11 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
|
||||
execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2)
|
||||
file(GLOB MYGPTJLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libgptj*)
|
||||
file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
|
||||
file(GLOB MYBERTLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libbert*)
|
||||
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
|
||||
file(COPY ${MYGPTJLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYLLAMALIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYBERTLLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYLLMODELLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
|
||||
|
@@ -2,9 +2,6 @@ set(WINDEPLOYQT "@WINDEPLOYQT@")
|
||||
set(COMPONENT_NAME_MAIN "@COMPONENT_NAME_MAIN@")
|
||||
set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
|
||||
execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
|
||||
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/*llmodel.*)
|
||||
file(COPY ${MYLLMODELLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
|
||||
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
|
||||
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-48.png"
|
||||
|
@@ -65,10 +65,14 @@ MySettings::MySettings()
|
||||
{
|
||||
QSettings::setDefaultFormat(QSettings::IniFormat);
|
||||
|
||||
std::vector<LLModel::GPUDevice> devices = LLModel::Implementation::availableGPUDevices();
|
||||
QVector<QString> deviceList{ "Auto" };
|
||||
#if defined(Q_OS_MAC) && defined(__aarch64__)
|
||||
deviceList << "Metal";
|
||||
#else
|
||||
std::vector<LLModel::GPUDevice> devices = LLModel::Implementation::availableGPUDevices();
|
||||
for (LLModel::GPUDevice &d : devices)
|
||||
deviceList << QString::fromStdString(d.name);
|
||||
deviceList << QString::fromStdString(d.selectionName());
|
||||
#endif
|
||||
deviceList << "CPU";
|
||||
setDeviceList(deviceList);
|
||||
}
|
||||
@@ -786,7 +790,23 @@ QString MySettings::device() const
|
||||
{
|
||||
QSettings setting;
|
||||
setting.sync();
|
||||
return setting.value("device", default_device).toString();
|
||||
auto value = setting.value("device");
|
||||
if (!value.isValid())
|
||||
return default_device;
|
||||
|
||||
auto device = value.toString();
|
||||
if (!device.isEmpty()) {
|
||||
auto deviceStr = device.toStdString();
|
||||
auto newNameStr = LLModel::GPUDevice::updateSelectionName(deviceStr);
|
||||
if (newNameStr != deviceStr) {
|
||||
auto newName = QString::fromStdString(newNameStr);
|
||||
qWarning() << "updating device name:" << device << "->" << newName;
|
||||
device = newName;
|
||||
setting.setValue("device", device);
|
||||
setting.sync();
|
||||
}
|
||||
}
|
||||
return device;
|
||||
}
|
||||
|
||||
void MySettings::setDevice(const QString &u)
|
||||
|
Reference in New Issue
Block a user