Dlopen backend 5 (#779)

Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved.
2025-09-06 11:00:48 +00:00 · 2023-05-31 17:04:01 -04:00
parent f4a1f7340c
commit 48275d0dcc
22 changed files with 993 additions and 327 deletions
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -1,21 +1,23 @@
 #ifndef LLMODEL_H
 #define LLMODEL_H
-
 #include <string>
 #include <functional>
 #include <vector>
 #include <cstdint>

+
 class LLModel {
 public:
    explicit LLModel() {}
    virtual ~LLModel() {}

+    static LLModel *construct(const std::string &modelPath, std::string buildVariant = "default");
+
    virtual bool loadModel(const std::string &modelPath) = 0;
    virtual bool isModelLoaded() const = 0;
    virtual size_t stateSize() const { return 0; }
-    virtual size_t saveState(uint8_t *dest) const { return 0; }
-    virtual size_t restoreState(const uint8_t *src) { return 0; }
+    virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
+    virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
    struct PromptContext {
        std::vector<float> logits;      // logits of current context
        std::vector<int32_t> tokens;    // current tokens in the context window
@@ -36,12 +38,18 @@ public:
        std::function<bool(int32_t, const std::string&)> responseCallback,
        std::function<bool(bool)> recalculateCallback,
        PromptContext &ctx) = 0;
-    virtual void setThreadCount(int32_t n_threads) {}
+    virtual void setThreadCount(int32_t /*n_threads*/) {}
    virtual int32_t threadCount() const { return 1; }

+    const char *getModelType() const {
+        return modelType;
+    }
+
 protected:
    virtual void recalculateContext(PromptContext &promptCtx,
        std::function<bool(bool)> recalculate) = 0;
+
+    const char *modelType;
 };

 #endif // LLMODEL_H