Trying to shrink the copy+paste code and do more code sharing between backend model impl.

2025-11-12 13:08:54 +00:00 · 2023-06-01 23:15:58 -04:00
parent 031d7149a7
commit a41bd6ac0a
9 changed files with 41 additions and 96 deletions
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@@ -1,6 +1,7 @@
 #include "llmodel.h"
 #include "dlhandle.h"

+#include <iostream>
 #include <string>
 #include <vector>
 #include <fstream>
@@ -95,6 +96,28 @@ const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const s
    return nullptr;
 }

+void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
+    size_t i = 0;
+    promptCtx.n_past = 0;
+    while (i < promptCtx.tokens.size()) {
+        size_t batch_end = std::min(i + promptCtx.n_batch, promptCtx.tokens.size());
+        std::vector<int32_t> batch(promptCtx.tokens.begin() + i, promptCtx.tokens.begin() + batch_end);
+        assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx);
+        if (!evalTokens(promptCtx, batch)) {
+            std::cerr << "LLModel ERROR: Failed to process prompt\n";
+            goto stop_generating;
+        }
+        promptCtx.n_past += batch.size();
+        if (!recalculate(true))
+            goto stop_generating;
+        i = batch_end;
+    }
+    assert(promptCtx.n_past == int32_t(promptCtx.tokens.size()));
+
+stop_generating:
+    recalculate(false);
+}
+
 LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
    //TODO: Auto-detect CUDA/OpenCL
    if (buildVariant == "auto") {