Fix regenerate button to be deterministic and bump the llama version to latest we have for gguf.

2025-09-06 02:50:36 +00:00 · 2023-10-03 12:42:31 -04:00
parent 40c78d2f78
commit 12f943e966
4 changed files with 5 additions and 15 deletions
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@@ -92,10 +92,10 @@ void LLModel::prompt(const std::string &prompt,
            if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
                promptCtx.tokens.erase(promptCtx.tokens.begin());
            promptCtx.tokens.push_back(batch.at(t));
+            promptCtx.n_past += 1;
            if (!promptCallback(batch.at(t)))
                return;
        }
-        promptCtx.n_past += batch.size();
        i = batch_end;
    }

@@ -126,8 +126,6 @@ void LLModel::prompt(const std::string &prompt,
            return;
        }

-        promptCtx.n_past += 1;
-
        // display text
        for (const auto token : endTokens()) {
            if (id == token) return;
@@ -162,6 +160,7 @@ void LLModel::prompt(const std::string &prompt,
            if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
                promptCtx.tokens.erase(promptCtx.tokens.begin());
            promptCtx.tokens.push_back(t);
+            promptCtx.n_past += 1;
            //TODO: Conversion to std::string can be avoided here...
            if (!responseCallback(t, std::string(tokenToString(t))))
                return;