Move the backend code into own subdirectory and make it a shared library. Begin fleshing out the C api wrapper that bindings can use.

2025-08-15 22:53:22 +00:00 · 2023-04-25 19:16:45 -04:00 · 2023-04-25 19:16:45 -04:00 · 4b47478626
commit 4b47478626
parent 52c4215b11
15 changed files with 188 additions and 20 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,3 @@
 [submodule "llama.cpp"]
-	path = llama.cpp
+	path = llmodel/llama.cpp
 	url = https://github.com/manyoso/llama.cpp.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -32,6 +32,8 @@ set(CMAKE_AUTOMOC ON)
 set(CMAKE_AUTORCC ON)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
 find_package(Qt6 6.2 COMPONENTS Core Quick QuickDialogs2 Svg REQUIRED)
 # Get the Qt6Core target properties
@ -48,28 +50,13 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
 message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
 message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
-set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
+add_subdirectory(llmodel)
 set(BUILD_SHARED_LIBS ON FORCE)
 set(CMAKE_VERBOSE_MAKEFILE ON)
 option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
 option(GPT4ALL_LOCALHOST OFF "Build for local install repo")
 if (GPT4ALL_AVX_ONLY)
    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
 endif()
 add_subdirectory(llama.cpp)
 qt_add_executable(chat
    main.cpp
    download.h download.cpp
    network.h network.cpp
    gptj.h gptj.cpp
    llamamodel.h llamamodel.cpp
    llama.cpp/examples/common.cpp
    llm.h llm.cpp
    llmodel.h
    utils.h utils.cpp
 )
 qt_add_qml_module(chat
@ -123,7 +110,7 @@ target_compile_definitions(chat
 target_link_libraries(chat
    PRIVATE Qt6::Quick Qt6::Svg)
 target_link_libraries(chat
-    PRIVATE llama)
+    PRIVATE llmodel)
 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
@ -134,6 +121,7 @@ if(NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR
 endif()
 install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
 install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 install(TARGETS llama DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 set(CPACK_GENERATOR "IFW")
--- a/cmake/deploy-qt-mac.cmake.in
+++ b/cmake/deploy-qt-mac.cmake.in
@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dylib
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dylib
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/favicon.icns"
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Resources)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
--- a/cmake/deploy-qt-windows.cmake.in
+++ b/cmake/deploy-qt-windows.cmake.in
@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dll
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dll
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
--- a/llm.h
+++ b/llm.h
@ -3,8 +3,8 @@
 #include <QObject>
 #include <QThread>
-#include "gptj.h"
+#include "llmodel/gptj.h"
-#include "llamamodel.h"
+#include "llmodel/llamamodel.h"
 class LLMObject : public QObject
 {
--- a/llmodel/CMakeLists.txt
+++ b/llmodel/CMakeLists.txt
@ -0,0 +1,55 @@
 cmake_minimum_required(VERSION 3.16)
 if(APPLE)
  option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
  if(BUILD_UNIVERSAL)
    # Build a Universal binary on macOS
    # This requires that the found Qt library is compiled as Universal binaries.
    set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
  else()
    # Build for the host architecture on macOS
    set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
  endif()
 endif()
 set(APP_VERSION_MAJOR 2)
 set(APP_VERSION_MINOR 2)
 set(APP_VERSION_PATCH 2)
 set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
 # Generate a header file with the version number
 configure_file(
  "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/config.h.in"
  "${CMAKE_CURRENT_BINARY_DIR}/../config.h"
 )
 # Include the binary directory for the generated header file
 include_directories("${CMAKE_CURRENT_BINARY_DIR}")
 project(llmodel VERSION ${APP_VERSION} LANGUAGES CXX C)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
 set(BUILD_SHARED_LIBS ON FORCE)
 set(CMAKE_VERBOSE_MAKEFILE ON)
 if (GPT4ALL_AVX_ONLY)
    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
 endif()
 add_subdirectory(llama.cpp)
 add_library(llmodel
    gptj.h gptj.cpp
    llamamodel.h llamamodel.cpp
    llama.cpp/examples/common.cpp
    llmodel.h llmodel_c.h
    utils.h utils.cpp
 )
 target_link_libraries(llmodel
    PRIVATE llama)
 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
--- a/llmodel/gptj.cpp
+++ b/llmodel/gptj.cpp
--- a/llmodel/gptj.h
+++ b/llmodel/gptj.h
--- a/llmodel/llama.cpp
+++ b/llmodel/llama.cpp
--- a/llmodel/llamamodel.cpp
+++ b/llmodel/llamamodel.cpp
--- a/llmodel/llamamodel.h
+++ b/llmodel/llamamodel.h
--- a/llmodel/llmodel.h
+++ b/llmodel/llmodel.h
--- a/llmodel/llmodel_c.h
+++ b/llmodel/llmodel_c.h
@ -0,0 +1,121 @@
 #ifndef LLMODEL_C_H
 #define LLMODEL_C_H
 #include <stdint.h>
 #include <stdbool.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Opaque pointers to the underlying C++ classes.
 */
 typedef void *LLMODEL_C;
 typedef void *GPTJ_C;
 typedef void *LLAMA_C;
 /**
 * PromptContext_C structure for holding the prompt context.
 */
 typedef struct {
    float *logits;          // logits of current context
    int32_t *tokens;        // current tokens in the context window
    int32_t n_past;         // number of tokens in past conversation
    int32_t n_ctx;          // number of tokens possible in context window
    int32_t n_predict;      // number of tokens to predict
    int32_t top_k;          // top k logits to sample from
    float top_p;            // nucleus sampling probability threshold
    float temp;             // temperature to adjust model's output distribution
    int32_t n_batch;        // number of predictions to generate in parallel
    float repeat_penalty;   // penalty factor for repeated tokens
    int32_t repeat_last_n;  // last n tokens to penalize
    float contextErase;     // percent of context to erase if we exceed the context window
 } PromptContext_C;
 /**
 * Callback types for response and recalculation.
 */
 typedef bool (*ResponseCallback)(int32_t, const char *);
 typedef bool (*RecalculateCallback)(bool);
 /**
 * Create a GPTJ instance.
 * @return A pointer to the GPTJ instance.
 */
 GPTJ_C GPTJ_create();
 /**
 * Destroy a GPTJ instance.
 * @param gptj A pointer to the GPTJ instance.
 */
 void GPTJ_destroy(GPTJ_C gptj);
 /**
 * Create a LLAMA instance.
 * @return A pointer to the LLAMA instance.
 */
 LLAMA_C LLAMA_create();
 /**
 * Destroy a LLAMA instance.
 * @param llama A pointer to the LLAMA instance.
 */
 void LLAMA_destroy(LLAMA_C llama);
 /**
 * Load a model from a file.
 * @param model A pointer to the LLMODEL_C instance.
 * @param modelPath A string representing the path to the model file.
 * @return true if the model was loaded successfully, false otherwise.
 */
 bool LLMODEL_loadModel(LLMODEL_C model, const char *modelPath);
 /**
 * Load a model from an input stream.
 * @param model A pointer to the LLMODEL_C instance.
 * @param modelPath A string representing the path to the model file.
 * @param fin A pointer to the input stream.
 * @return true if the model was loaded successfully, false otherwise.
 */
 bool LLMODEL_loadModelStream(LLMODEL_C model, const char *modelPath, void *fin);
 /**
 * Check if a model is loaded.
 * @param model A pointer to the LLMODEL_C instance.
 * @return true if the model is loaded, false otherwise.
 */
 bool LLMODEL_isModelLoaded(LLMODEL_C model);
 /**
 * Generate a response using the model.
 * @param model A pointer to the LLMODEL_C instance.
 * @param prompt A string representing the input prompt.
 * @param response A callback function for handling the generated response.
 * @param recalculate A callback function for handling recalculation requests.
 * @param ctx A pointer to the PromptContext_C structure.
 */
 void LLMODEL_prompt(LLMODEL_C model, const char *prompt,
                    ResponseCallback response,
                    RecalculateCallback recalculate,
                    PromptContext_C *ctx);
 /**
 * Set the number of threads to be used by the model.
 * @param model A pointer to the LLMODEL_C instance.
 * @param n_threads The number of threads to be used.
 */
 void LLMODEL_setThreadCount(LLMODEL_C model, int32_t n_threads);
 /**
 * Get the number of threads currently being used by the model.
 * @param model A pointer to the LLMODEL_C instance.
 * @return The number of threads currently being used.
 */
 int32_t LLMODEL_threadCount(LLMODEL_C model);
 #ifdef __cplusplus
 }
 #endif
 #endif // LLMODEL_C_H
--- a/llmodel/utils.cpp
+++ b/llmodel/utils.cpp
--- a/llmodel/utils.h
+++ b/llmodel/utils.h