From 4b474786267375a3d8d6c76cde72459beebb153a Mon Sep 17 00:00:00 2001
From: Adam Treat <treat.adam@gmail.com>
Date: Tue, 25 Apr 2023 19:16:45 -0400
Subject: [PATCH] Move the backend code into own subdirectory and make it a
 shared library. Begin fleshing out the C api wrapper that bindings can use.

---
 .gitmodules                              |   2 +-
 CMakeLists.txt                           |  22 +----
 cmake/deploy-qt-mac.cmake.in             |   2 +
 cmake/deploy-qt-windows.cmake.in         |   2 +
 llm.h                                    |   4 +-
 llmodel/CMakeLists.txt                   |  55 +++++++++++
 gptj.cpp => llmodel/gptj.cpp             |   0
 gptj.h => llmodel/gptj.h                 |   0
 llama.cpp => llmodel/llama.cpp           |   0
 llamamodel.cpp => llmodel/llamamodel.cpp |   0
 llamamodel.h => llmodel/llamamodel.h     |   0
 llmodel.h => llmodel/llmodel.h           |   0
 llmodel/llmodel_c.h                      | 121 +++++++++++++++++++++++
 utils.cpp => llmodel/utils.cpp           |   0
 utils.h => llmodel/utils.h               |   0
 15 files changed, 188 insertions(+), 20 deletions(-)
 create mode 100644 llmodel/CMakeLists.txt
 rename gptj.cpp => llmodel/gptj.cpp (100%)
 rename gptj.h => llmodel/gptj.h (100%)
 rename llama.cpp => llmodel/llama.cpp (100%)
 rename llamamodel.cpp => llmodel/llamamodel.cpp (100%)
 rename llamamodel.h => llmodel/llamamodel.h (100%)
 rename llmodel.h => llmodel/llmodel.h (100%)
 create mode 100644 llmodel/llmodel_c.h
 rename utils.cpp => llmodel/utils.cpp (100%)
 rename utils.h => llmodel/utils.h (100%)

diff --git a/.gitmodules b/.gitmodules
index 6e77e7f5..98f7d4d7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "llama.cpp"]
-	path = llama.cpp
+	path = llmodel/llama.cpp
 	url = https://github.com/manyoso/llama.cpp.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab977daf..7fdaf57c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,6 +32,8 @@ set(CMAKE_AUTOMOC ON)
 set(CMAKE_AUTORCC ON)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
+option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
+
 find_package(Qt6 6.2 COMPONENTS Core Quick QuickDialogs2 Svg REQUIRED)
 
 # Get the Qt6Core target properties
@@ -48,28 +50,13 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
 message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
 message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
 
-set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
-set(BUILD_SHARED_LIBS ON FORCE)
-
-set(CMAKE_VERBOSE_MAKEFILE ON)
-option(GPT4ALL_AVX_ONLY OFF "Build for avx only")
-option(GPT4ALL_LOCALHOST OFF "Build for local install repo")
-if (GPT4ALL_AVX_ONLY)
-    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
-endif()
-
-add_subdirectory(llama.cpp)
+add_subdirectory(llmodel)
 
 qt_add_executable(chat
     main.cpp
     download.h download.cpp
     network.h network.cpp
-    gptj.h gptj.cpp
-    llamamodel.h llamamodel.cpp
-    llama.cpp/examples/common.cpp
     llm.h llm.cpp
-    llmodel.h
-    utils.h utils.cpp
 )
 
 qt_add_qml_module(chat
@@ -123,7 +110,7 @@ target_compile_definitions(chat
 target_link_libraries(chat
     PRIVATE Qt6::Quick Qt6::Svg)
 target_link_libraries(chat
-    PRIVATE llama)
+    PRIVATE llmodel)
 
 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
@@ -134,6 +121,7 @@ if(NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR
 endif()
 
 install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
+install(TARGETS llmodel DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 install(TARGETS llama DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 
 set(CPACK_GENERATOR "IFW")
diff --git a/cmake/deploy-qt-mac.cmake.in b/cmake/deploy-qt-mac.cmake.in
index faa16125..a7eeba65 100644
--- a/cmake/deploy-qt-mac.cmake.in
+++ b/cmake/deploy-qt-mac.cmake.in
@@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dylib
      DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
+file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dylib
+     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/favicon.icns"
      DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Resources)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
diff --git a/cmake/deploy-qt-windows.cmake.in b/cmake/deploy-qt-windows.cmake.in
index 3dec0149..b9f7428c 100644
--- a/cmake/deploy-qt-windows.cmake.in
+++ b/cmake/deploy-qt-windows.cmake.in
@@ -4,6 +4,8 @@ set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
 execute_process(COMMAND ${WINDEPLOYQT} --qmldir ${CMAKE_CURRENT_SOURCE_DIR} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama.dll
      DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
+file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.dll
+     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-16.png"
      DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
 file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/logo-32.png"
diff --git a/llm.h b/llm.h
index 42028c9b..73024b35 100644
--- a/llm.h
+++ b/llm.h
@@ -3,8 +3,8 @@
 
 #include <QObject>
 #include <QThread>
-#include "gptj.h"
-#include "llamamodel.h"
+#include "llmodel/gptj.h"
+#include "llmodel/llamamodel.h"
 
 class LLMObject : public QObject
 {
diff --git a/llmodel/CMakeLists.txt b/llmodel/CMakeLists.txt
new file mode 100644
index 00000000..50bd5509
--- /dev/null
+++ b/llmodel/CMakeLists.txt
@@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.16)
+
+if(APPLE)
+  option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
+  if(BUILD_UNIVERSAL)
+    # Build a Universal binary on macOS
+    # This requires that the found Qt library is compiled as Universal binaries.
+    set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
+  else()
+    # Build for the host architecture on macOS
+    set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
+  endif()
+endif()
+
+set(APP_VERSION_MAJOR 2)
+set(APP_VERSION_MINOR 2)
+set(APP_VERSION_PATCH 2)
+set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
+
+# Generate a header file with the version number
+configure_file(
+  "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/config.h.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/../config.h"
+)
+
+# Include the binary directory for the generated header file
+include_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+project(llmodel VERSION ${APP_VERSION} LANGUAGES CXX C)
+
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
+set(BUILD_SHARED_LIBS ON FORCE)
+
+set(CMAKE_VERBOSE_MAKEFILE ON)
+if (GPT4ALL_AVX_ONLY)
+    set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
+endif()
+
+add_subdirectory(llama.cpp)
+
+add_library(llmodel
+    gptj.h gptj.cpp
+    llamamodel.h llamamodel.cpp
+    llama.cpp/examples/common.cpp
+    llmodel.h llmodel_c.h
+    utils.h utils.cpp
+)
+
+target_link_libraries(llmodel
+    PRIVATE llama)
+
+set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
+set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
diff --git a/gptj.cpp b/llmodel/gptj.cpp
similarity index 100%
rename from gptj.cpp
rename to llmodel/gptj.cpp
diff --git a/gptj.h b/llmodel/gptj.h
similarity index 100%
rename from gptj.h
rename to llmodel/gptj.h
diff --git a/llama.cpp b/llmodel/llama.cpp
similarity index 100%
rename from llama.cpp
rename to llmodel/llama.cpp
diff --git a/llamamodel.cpp b/llmodel/llamamodel.cpp
similarity index 100%
rename from llamamodel.cpp
rename to llmodel/llamamodel.cpp
diff --git a/llamamodel.h b/llmodel/llamamodel.h
similarity index 100%
rename from llamamodel.h
rename to llmodel/llamamodel.h
diff --git a/llmodel.h b/llmodel/llmodel.h
similarity index 100%
rename from llmodel.h
rename to llmodel/llmodel.h
diff --git a/llmodel/llmodel_c.h b/llmodel/llmodel_c.h
new file mode 100644
index 00000000..e68cf045
--- /dev/null
+++ b/llmodel/llmodel_c.h
@@ -0,0 +1,121 @@
+#ifndef LLMODEL_C_H
+#define LLMODEL_C_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Opaque pointers to the underlying C++ classes.
+ */
+typedef void *LLMODEL_C;
+typedef void *GPTJ_C;
+typedef void *LLAMA_C;
+
+/**
+ * PromptContext_C structure for holding the prompt context.
+ */
+typedef struct {
+    float *logits;          // logits of current context
+    int32_t *tokens;        // current tokens in the context window
+    int32_t n_past;         // number of tokens in past conversation
+    int32_t n_ctx;          // number of tokens possible in context window
+    int32_t n_predict;      // number of tokens to predict
+    int32_t top_k;          // top k logits to sample from
+    float top_p;            // nucleus sampling probability threshold
+    float temp;             // temperature to adjust model's output distribution
+    int32_t n_batch;        // number of predictions to generate in parallel
+    float repeat_penalty;   // penalty factor for repeated tokens
+    int32_t repeat_last_n;  // last n tokens to penalize
+    float contextErase;     // percent of context to erase if we exceed the context window
+} PromptContext_C;
+
+/**
+ * Callback types for response and recalculation.
+ */
+typedef bool (*ResponseCallback)(int32_t, const char *);
+typedef bool (*RecalculateCallback)(bool);
+
+/**
+ * Create a GPTJ instance.
+ * @return A pointer to the GPTJ instance.
+ */
+GPTJ_C GPTJ_create();
+
+/**
+ * Destroy a GPTJ instance.
+ * @param gptj A pointer to the GPTJ instance.
+ */
+void GPTJ_destroy(GPTJ_C gptj);
+
+/**
+ * Create a LLAMA instance.
+ * @return A pointer to the LLAMA instance.
+ */
+LLAMA_C LLAMA_create();
+
+/**
+ * Destroy a LLAMA instance.
+ * @param llama A pointer to the LLAMA instance.
+ */
+void LLAMA_destroy(LLAMA_C llama);
+
+/**
+ * Load a model from a file.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param modelPath A string representing the path to the model file.
+ * @return true if the model was loaded successfully, false otherwise.
+ */
+bool LLMODEL_loadModel(LLMODEL_C model, const char *modelPath);
+
+/**
+ * Load a model from an input stream.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param modelPath A string representing the path to the model file.
+ * @param fin A pointer to the input stream.
+ * @return true if the model was loaded successfully, false otherwise.
+ */
+bool LLMODEL_loadModelStream(LLMODEL_C model, const char *modelPath, void *fin);
+
+/**
+ * Check if a model is loaded.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @return true if the model is loaded, false otherwise.
+ */
+bool LLMODEL_isModelLoaded(LLMODEL_C model);
+
+/**
+ * Generate a response using the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param prompt A string representing the input prompt.
+ * @param response A callback function for handling the generated response.
+ * @param recalculate A callback function for handling recalculation requests.
+ * @param ctx A pointer to the PromptContext_C structure.
+ */
+void LLMODEL_prompt(LLMODEL_C model, const char *prompt,
+                    ResponseCallback response,
+                    RecalculateCallback recalculate,
+                    PromptContext_C *ctx);
+
+/**
+ * Set the number of threads to be used by the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @param n_threads The number of threads to be used.
+ */
+void LLMODEL_setThreadCount(LLMODEL_C model, int32_t n_threads);
+
+/**
+ * Get the number of threads currently being used by the model.
+ * @param model A pointer to the LLMODEL_C instance.
+ * @return The number of threads currently being used.
+ */
+int32_t LLMODEL_threadCount(LLMODEL_C model);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LLMODEL_C_H
diff --git a/utils.cpp b/llmodel/utils.cpp
similarity index 100%
rename from utils.cpp
rename to llmodel/utils.cpp
diff --git a/utils.h b/llmodel/utils.h
similarity index 100%
rename from utils.h
rename to llmodel/utils.h