Fix regenerate button to be deterministic and bump the llama version to latest we have for gguf.

2025-09-10 21:09:13 +00:00 · 2023-10-03 12:42:31 -04:00
parent 40c78d2f78
commit 12f943e966
4 changed files with 5 additions and 15 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -249,16 +249,7 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const

 bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
 {
-    // When we recalculate context we could have erased the original BOS token... we need to replace it
-    const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->ctx));
-    if (useBOS) {
-        std::vector<int32_t> myTokens;
-        myTokens.push_back(llama_token_bos(d_ptr->ctx));
-        myTokens.insert(myTokens.end(), tokens.begin(), tokens.end());
-        ctx.n_past += 1;
-        return llama_eval(d_ptr->ctx, myTokens.data(), myTokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
-    } else
-        return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
+    return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
 }

 int32_t LLamaModel::contextLength() const