From 4b474786267375a3d8d6c76cde72459beebb153a Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Tue, 25 Apr 2023 19:16:45 -0400 Subject: [PATCH] Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use. --- .gitmodules | 2 +- CMakeLists.txt | 22 +---- cmake/deploy-qt-mac.cmake.in | 2 + cmake/deploy-qt-windows.cmake.in | 2 + llm.h | 4 +- llmodel/CMakeLists.txt | 55 +++++++++++ gptj.cpp => llmodel/gptj.cpp | 0 gptj.h => llmodel/gptj.h | 0 llama.cpp => llmodel/llama.cpp | 0 llamamodel.cpp => llmodel/llamamodel.cpp | 0 llamamodel.h => llmodel/llamamodel.h | 0 llmodel.h => llmodel/llmodel.h | 0 llmodel/llmodel_c.h | 121 +++++++++++++++++++++++ utils.cpp => llmodel/utils.cpp | 0 utils.h => llmodel/utils.h | 0 15 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 llmodel/CMakeLists.txt rename gptj.cpp => llmodel/gptj.cpp (100%) rename gptj.h => llmodel/gptj.h (100%) rename llama.cpp => llmodel/llama.cpp (100%) rename llamamodel.cpp => llmodel/llamamodel.cpp (100%) rename llamamodel.h => llmodel/llamamodel.h (100%) rename llmodel.h => llmodel/llmodel.h (100%) create mode 100644 llmodel/llmodel_c.h rename utils.cpp => llmodel/utils.cpp (100%) rename utils.h => llmodel/utils.h (100%) diff --git a/.gitmodules b/.gitmodules index 6e77e7f5..98f7d4d7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "llama.cpp"] - path = llama.cpp + path = llmodel/llama.cpp url = https://github.com/manyoso/llama.cpp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index ab977daf..7fdaf57c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,8 @@ set(CMAKE_AUTOMOC ON) set(CMAKE_AUTORCC ON) set(CMAKE_CXX_STANDARD_REQUIRED ON) +option(GPT4ALL_AVX_ONLY OFF "Build for avx only") + find_package(Qt6 6.2 COMPONENTS Core Quick QuickDialogs2 Svg REQUIRED) # Get the Qt6Core target properties @@ -48,28 +50,13 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE) message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}") message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}") -set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE) -set(BUILD_SHARED_LIBS ON FORCE) - -set(CMAKE_VERBOSE_MAKEFILE ON) -option(GPT4ALL_AVX_ONLY OFF "Build for avx only") -option(GPT4ALL_LOCALHOST OFF "Build for local install repo") -if (GPT4ALL_AVX_ONLY) - set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE) -endif() - -add_subdirectory(llama.cpp) +add_subdirectory(llmodel) qt_add_executable(chat main.cpp download.h download.cpp network.h network.cpp - gptj.h gptj.cpp - llamamodel.h llamamodel.cpp - llama.cpp/examples/common.cpp llm.h llm.cpp - llmodel.h - utils.h utils.cpp ) qt_add_qml_module(chat @@ -123,7 +110,7 @@ target_compile_definitions(chat target_link_libraries(chat PRIVATE Qt6::Quick Qt6::Svg) target_link_libraries(chat - PRIVATE llama) + PRIVATE llmodel) set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) @@ -134,6 +121,7 @@ if(NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR endif() install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN}) +install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}) install(TARGETS llama DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}) set(CPACK_GENERATOR "IFW") diff --git a/cmake/deploy-qt-mac.cmake.in b/cmake/deploy-qt-mac.cmake.in index faa16125..a7eeba65 100644 --- a/cmake/deploy-qt-mac.cmake.in +++ b/cmake/deploy-qt-mac.cmake.in @@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@") execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2) file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dylib DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) +file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dylib + DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/favicon.icns" DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Resources) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png" diff --git a/cmake/deploy-qt-windows.cmake.in b/cmake/deploy-qt-windows.cmake.in index 3dec0149..b9f7428c 100644 --- a/cmake/deploy-qt-windows.cmake.in +++ b/cmake/deploy-qt-windows.cmake.in @@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@") execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin) file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dll DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin) +file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dll + DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png" DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png" diff --git a/llm.h b/llm.h index 42028c9b..73024b35 100644 --- a/llm.h +++ b/llm.h @@ -3,8 +3,8 @@ #include #include -#include "gptj.h" -#include "llamamodel.h" +#include "llmodel/gptj.h" +#include "llmodel/llamamodel.h" class LLMObject : public QObject { diff --git a/llmodel/CMakeLists.txt b/llmodel/CMakeLists.txt new file mode 100644 index 00000000..50bd5509 --- /dev/null +++ b/llmodel/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.16) + +if(APPLE) + option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF) + if(BUILD_UNIVERSAL) + # Build a Universal binary on macOS + # This requires that the found Qt library is compiled as Universal binaries. + set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE) + else() + # Build for the host architecture on macOS + set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE) + endif() +endif() + +set(APP_VERSION_MAJOR 2) +set(APP_VERSION_MINOR 2) +set(APP_VERSION_PATCH 2) +set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}") + +# Generate a header file with the version number +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/config.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/../config.h" +) + +# Include the binary directory for the generated header file +include_directories("${CMAKE_CURRENT_BINARY_DIR}") + +project(llmodel VERSION ${APP_VERSION} LANGUAGES CXX C) + +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE) +set(BUILD_SHARED_LIBS ON FORCE) + +set(CMAKE_VERBOSE_MAKEFILE ON) +if (GPT4ALL_AVX_ONLY) + set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE) +endif() + +add_subdirectory(llama.cpp) + +add_library(llmodel + gptj.h gptj.cpp + llamamodel.h llamamodel.cpp + llama.cpp/examples/common.cpp + llmodel.h llmodel_c.h + utils.h utils.cpp +) + +target_link_libraries(llmodel + PRIVATE llama) + +set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) +set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) diff --git a/gptj.cpp b/llmodel/gptj.cpp similarity index 100% rename from gptj.cpp rename to llmodel/gptj.cpp diff --git a/gptj.h b/llmodel/gptj.h similarity index 100% rename from gptj.h rename to llmodel/gptj.h diff --git a/llama.cpp b/llmodel/llama.cpp similarity index 100% rename from llama.cpp rename to llmodel/llama.cpp diff --git a/llamamodel.cpp b/llmodel/llamamodel.cpp similarity index 100% rename from llamamodel.cpp rename to llmodel/llamamodel.cpp diff --git a/llamamodel.h b/llmodel/llamamodel.h similarity index 100% rename from llamamodel.h rename to llmodel/llamamodel.h diff --git a/llmodel.h b/llmodel/llmodel.h similarity index 100% rename from llmodel.h rename to llmodel/llmodel.h diff --git a/llmodel/llmodel_c.h b/llmodel/llmodel_c.h new file mode 100644 index 00000000..e68cf045 --- /dev/null +++ b/llmodel/llmodel_c.h @@ -0,0 +1,121 @@ +#ifndef LLMODEL_C_H +#define LLMODEL_C_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Opaque pointers to the underlying C++ classes. + */ +typedef void *LLMODEL_C; +typedef void *GPTJ_C; +typedef void *LLAMA_C; + +/** + * PromptContext_C structure for holding the prompt context. + */ +typedef struct { + float *logits; // logits of current context + int32_t *tokens; // current tokens in the context window + int32_t n_past; // number of tokens in past conversation + int32_t n_ctx; // number of tokens possible in context window + int32_t n_predict; // number of tokens to predict + int32_t top_k; // top k logits to sample from + float top_p; // nucleus sampling probability threshold + float temp; // temperature to adjust model's output distribution + int32_t n_batch; // number of predictions to generate in parallel + float repeat_penalty; // penalty factor for repeated tokens + int32_t repeat_last_n; // last n tokens to penalize + float contextErase; // percent of context to erase if we exceed the context window +} PromptContext_C; + +/** + * Callback types for response and recalculation. + */ +typedef bool (*ResponseCallback)(int32_t, const char *); +typedef bool (*RecalculateCallback)(bool); + +/** + * Create a GPTJ instance. + * @return A pointer to the GPTJ instance. + */ +GPTJ_C GPTJ_create(); + +/** + * Destroy a GPTJ instance. + * @param gptj A pointer to the GPTJ instance. + */ +void GPTJ_destroy(GPTJ_C gptj); + +/** + * Create a LLAMA instance. + * @return A pointer to the LLAMA instance. + */ +LLAMA_C LLAMA_create(); + +/** + * Destroy a LLAMA instance. + * @param llama A pointer to the LLAMA instance. + */ +void LLAMA_destroy(LLAMA_C llama); + +/** + * Load a model from a file. + * @param model A pointer to the LLMODEL_C instance. + * @param modelPath A string representing the path to the model file. + * @return true if the model was loaded successfully, false otherwise. + */ +bool LLMODEL_loadModel(LLMODEL_C model, const char *modelPath); + +/** + * Load a model from an input stream. + * @param model A pointer to the LLMODEL_C instance. + * @param modelPath A string representing the path to the model file. + * @param fin A pointer to the input stream. + * @return true if the model was loaded successfully, false otherwise. + */ +bool LLMODEL_loadModelStream(LLMODEL_C model, const char *modelPath, void *fin); + +/** + * Check if a model is loaded. + * @param model A pointer to the LLMODEL_C instance. + * @return true if the model is loaded, false otherwise. + */ +bool LLMODEL_isModelLoaded(LLMODEL_C model); + +/** + * Generate a response using the model. + * @param model A pointer to the LLMODEL_C instance. + * @param prompt A string representing the input prompt. + * @param response A callback function for handling the generated response. + * @param recalculate A callback function for handling recalculation requests. + * @param ctx A pointer to the PromptContext_C structure. + */ +void LLMODEL_prompt(LLMODEL_C model, const char *prompt, + ResponseCallback response, + RecalculateCallback recalculate, + PromptContext_C *ctx); + +/** + * Set the number of threads to be used by the model. + * @param model A pointer to the LLMODEL_C instance. + * @param n_threads The number of threads to be used. + */ +void LLMODEL_setThreadCount(LLMODEL_C model, int32_t n_threads); + +/** + * Get the number of threads currently being used by the model. + * @param model A pointer to the LLMODEL_C instance. + * @return The number of threads currently being used. + */ +int32_t LLMODEL_threadCount(LLMODEL_C model); + +#ifdef __cplusplus +} +#endif + +#endif // LLMODEL_C_H diff --git a/utils.cpp b/llmodel/utils.cpp similarity index 100% rename from utils.cpp rename to llmodel/utils.cpp diff --git a/utils.h b/llmodel/utils.h similarity index 100% rename from utils.h rename to llmodel/utils.h