From b162b5c64edb4ae44ddd1eb0f798d2e152338b10 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Fri, 9 Jun 2023 15:08:46 -0400 Subject: [PATCH] Revert "llama on Metal (#885)" This reverts commit c55f81b860df04b32cc9557dbea85e2465877723. --- .gitmodules | 2 +- gpt4all-backend/CMakeLists.txt | 58 ++++++++++---------------- gpt4all-backend/llama.cpp-mainline | 2 +- gpt4all-backend/llama.cpp.cmake | 67 +++++++++++++----------------- gpt4all-backend/llamamodel.cpp | 31 +------------- gpt4all-backend/llmodel.cpp | 30 +++++-------- gpt4all-chat/CMakeLists.txt | 12 ------ 7 files changed, 64 insertions(+), 138 deletions(-) diff --git a/.gitmodules b/.gitmodules index 74bf3c97..50de0692 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,4 +6,4 @@ url = https://github.com/manyoso/llama.cpp.git [submodule "llama.cpp-mainline"] path = gpt4all-backend/llama.cpp-mainline - url = https://github.com/nomic-ai/llama.cpp.git + url = https://github.com/ggerganov/llama.cpp.git diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index aab1e98d..ae33ad70 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -39,9 +39,6 @@ endif() include(llama.cpp.cmake) set(BUILD_VARIANTS default avxonly) -if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(BUILD_VARIANTS ${BUILD_VARIANTS} metal) -endif() set(CMAKE_VERBOSE_MAKEFILE ON) @@ -57,20 +54,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX}) set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX}) - if (BUILD_VARIANT STREQUAL metal) - set(LLAMA_K_QUANTS YES) - set(LLAMA_METAL YES) - else() - set(LLAMA_K_QUANTS NO) - set(LLAMA_METAL NO) - endif() - # Include GGML include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON) - if (NOT LLAMA_METAL) - include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON) - include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON) - endif() + include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON) + include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON) # Function for preparing individual implementations function(prepare_target TARGET_NAME BASE_LIB) @@ -95,30 +82,29 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) LLAMA_VERSIONS=>=3 LLAMA_DATE=999999) prepare_target(llamamodel-mainline llama-mainline) - if (NOT LLAMA_METAL) - add_library(llamamodel-230519-${BUILD_VARIANT} SHARED - llamamodel.cpp llmodel_shared.cpp) - target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE - LLAMA_VERSIONS===2 LLAMA_DATE=230519) - prepare_target(llamamodel-230519 llama-230519) - add_library(llamamodel-230511-${BUILD_VARIANT} SHARED - llamamodel.cpp llmodel_shared.cpp) - target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE - LLAMA_VERSIONS=<=1 LLAMA_DATE=230511) - prepare_target(llamamodel-230511 llama-230511) + add_library(llamamodel-230519-${BUILD_VARIANT} SHARED + llamamodel.cpp llmodel_shared.cpp) + target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE + LLAMA_VERSIONS===2 LLAMA_DATE=230519) + prepare_target(llamamodel-230519 llama-230519) - add_library(gptj-${BUILD_VARIANT} SHARED - gptj.cpp utils.h utils.cpp llmodel_shared.cpp) - prepare_target(gptj ggml-230511) + add_library(llamamodel-230511-${BUILD_VARIANT} SHARED + llamamodel.cpp llmodel_shared.cpp) + target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE + LLAMA_VERSIONS=<=1 LLAMA_DATE=230511) + prepare_target(llamamodel-230511 llama-230511) - add_library(mpt-${BUILD_VARIANT} SHARED - mpt.cpp utils.h utils.cpp llmodel_shared.cpp) - prepare_target(mpt ggml-230511) + add_library(gptj-${BUILD_VARIANT} SHARED + gptj.cpp utils.h utils.cpp llmodel_shared.cpp) + prepare_target(gptj ggml-230511) - add_library(replit-${BUILD_VARIANT} SHARED - replit.cpp utils.h utils.cpp llmodel_shared.cpp) - prepare_target(replit ggml-230511) - endif() + add_library(mpt-${BUILD_VARIANT} SHARED + mpt.cpp utils.h utils.cpp llmodel_shared.cpp) + prepare_target(mpt ggml-230511) + + add_library(replit-${BUILD_VARIANT} SHARED + replit.cpp utils.h utils.cpp llmodel_shared.cpp) + prepare_target(replit ggml-230511) endforeach() add_library(llmodel diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index b33dee28..5b57a5b7 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit b33dee282f5d8032b5f780152732dc45cbf2d349 +Subproject commit 5b57a5b72676540b6a45a3f527126299969ad241 diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index c3dbf01a..01ded39d 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -34,7 +34,6 @@ endif() # # Option list # -# some of the options here are commented out so they can be set "dynamically" before calling include_ggml() # general option(LLAMA_STATIC "llama: static link libraries" OFF) @@ -69,7 +68,6 @@ option(LLAMA_OPENBLAS "llama: use OpenBLAS" #option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) #option(LLAMA_CLBLAST "llama: use CLBlast" OFF) #option(LLAMA_METAL "llama: use Metal" OFF) -#option(LLAMA_K_QUANTS "llama: use k-quants" ON) set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") @@ -265,32 +263,10 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) endif() set(GGML_SOURCES_QUANT_K ) - set(GGML_METAL_SOURCES ) - if (LLAMA_K_QUANTS) + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/ggml-quants-k.h) set(GGML_SOURCES_QUANT_K - ${DIRECTORY}/k_quants.h - ${DIRECTORY}/k_quants.c) - - if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) - find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) - - set(GGML_METAL_SOURCES ${DIRECTORY}/ggml-metal.m ${DIRECTORY}/ggml-metal.h) - # get full path to the file - #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") - - # copy ggml-metal.metal to bin directory - configure_file(${DIRECTORY}/ggml-metal.metal bin/ggml-metal.metal COPYONLY) - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} - ${FOUNDATION_LIBRARY} - ${METAL_FRAMEWORK} - ${METALKIT_FRAMEWORK} - ${METALPERFORMANCE_FRAMEWORK} - ) - endif() + ${DIRECTORY}/ggml-quants-k.h + ${DIRECTORY}/ggml-quants-k.c) endif() add_library(ggml${SUFFIX} OBJECT @@ -298,16 +274,8 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) ${DIRECTORY}/ggml.h ${GGML_SOURCES_QUANT_K} ${GGML_SOURCES_CUDA} - ${GGML_METAL_SOURCES} ${GGML_OPENCL_SOURCES}) - if (LLAMA_K_QUANTS) - target_compile_definitions(ggml${SUFFIX} PUBLIC GGML_USE_K_QUANTS) - endif() - - if (LLAMA_METAL AND GGML_METAL_SOURCES) - target_compile_definitions(ggml${SUFFIX} PUBLIC GGML_USE_METAL GGML_METAL_NDEBUG) - endif() target_include_directories(ggml${SUFFIX} PUBLIC ${DIRECTORY}) target_compile_features(ggml${SUFFIX} PUBLIC c_std_11) # don't bump @@ -327,9 +295,6 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) ${DIRECTORY}/llama.h ${DIRECTORY}/${LLAMA_UTIL_SOURCE_FILE}) - if (LLAMA_METAL AND GGML_METAL_SOURCES) - target_compile_definitions(llama${SUFFIX} PUBLIC GGML_USE_METAL GGML_METAL_NDEBUG) - endif() target_include_directories(llama${SUFFIX} PUBLIC ${DIRECTORY}) target_compile_features(llama${SUFFIX} PUBLIC cxx_std_11) # don't bump @@ -367,6 +332,32 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) target_compile_definitions(ggml${SUFFIX} PRIVATE GGML_USE_CLBLAST) endif() + if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) + + set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h) + + target_compile_definitions(llama${SUFFIX} PRIVATE + GGML_USE_METAL + GGML_METAL_NDEBUG) + + # get full path to the file + #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") + + # copy ggml-metal.metal to bin directory + configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ${METALPERFORMANCE_FRAMEWORK} + ) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") if (MSVC) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 17b55855..66aacac4 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -115,12 +115,6 @@ bool LLamaModel::loadModel(const std::string &modelPath) #if LLAMA_DATE <= 230511 d_ptr->params.n_parts = params.n_parts; #endif -#ifdef GGML_USE_METAL - std::cerr << "llama.cpp: using Metal" << std::endl; - // metal always runs the whole model if n_gpu_layers is not 0, at least - // currently - d_ptr->params.n_gpu_layers = 1; -#endif d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params); if (!d_ptr->ctx) { @@ -234,30 +228,7 @@ DLL_EXPORT bool magic_match(std::istream& f) { // Check version uint32_t version = 0; f.read(reinterpret_cast(&version), sizeof(version)); - if (!(version LLAMA_VERSIONS)) { - return false; - } -#ifdef GGML_USE_METAL - // Check quant supported on metal - // skip fields - off_t offset = sizeof(uint32_t) * 6; // n_vocab, n_embd, n_mult, n_head, n_layer, n_rot - f.seekg(offset, std::ios_base::cur); - uint32_t ftype; - f.read(reinterpret_cast(&ftype), sizeof(ftype)); // ftype - switch((enum llama_ftype) ftype) { - // currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55 - case LLAMA_FTYPE_MOSTLY_F16: - case LLAMA_FTYPE_MOSTLY_Q2_K: - case LLAMA_FTYPE_MOSTLY_Q4_0: - case LLAMA_FTYPE_MOSTLY_Q6_K: - case LLAMA_FTYPE_MOSTLY_Q4_K_S: - case LLAMA_FTYPE_MOSTLY_Q4_K_M: - return true; - default: // unsupported quant-type for Metal - return false; - } -#endif - return true; + return version LLAMA_VERSIONS; } DLL_EXPORT LLModel *construct() { diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index 3563f2c5..7499a75b 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -121,30 +121,20 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria if (!has_at_least_minimal_hardware()) return nullptr; + //TODO: Auto-detect CUDA/OpenCL + if (buildVariant == "auto") { + if (requires_avxonly()) { + buildVariant = "avxonly"; + } else { + buildVariant = "default"; + } + } // Read magic std::ifstream f(modelPath, std::ios::binary); if (!f) return nullptr; // Get correct implementation - const LLModel::Implementation* impl = nullptr; - - #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs - if (buildVariant == "auto") { - impl = implementation(f, "metal"); - } - #endif - - if (!impl) { - //TODO: Auto-detect CUDA/OpenCL - if (buildVariant == "auto") { - if (requires_avxonly()) { - buildVariant = "avxonly"; - } else { - buildVariant = "default"; - } - } - impl = implementation(f, buildVariant); - if (!impl) return nullptr; - } + auto impl = implementation(f, buildVariant); + if (!impl) return nullptr; f.close(); // Construct and return llmodel implementation return impl->construct(); diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 17b0f46f..a5266fa9 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -58,11 +58,6 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) add_subdirectory(../gpt4all-backend llmodel) -set(METAL_SHADER_FILE) -if(${CMAKE_SYSTEM_NAME} MATCHES Darwin) - set(METAL_SHADER_FILE ../gpt4all-backend/llama.cpp-mainline/ggml-metal.metal) -endif() - qt_add_executable(chat main.cpp chat.h chat.cpp @@ -77,7 +72,6 @@ qt_add_executable(chat server.h server.cpp logger.h logger.cpp sysinfo.h - ${METAL_SHADER_FILE} ) qt_add_qml_module(chat @@ -138,12 +132,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES Darwin) ) endif() -if(METAL_SHADER_FILE) - set_target_properties(chat PROPERTIES - RESOURCE ${METAL_SHADER_FILE} - ) -endif() - target_compile_definitions(chat PRIVATE $<$,$>:QT_QML_DEBUG>) target_link_libraries(chat