Fix regenerate button to be deterministic and bump the llama version to latest we have for gguf.

This commit is contained in:
Adam Treat
2023-10-03 12:42:31 -04:00
parent 40c78d2f78
commit 12f943e966
4 changed files with 5 additions and 15 deletions

View File

@@ -249,16 +249,7 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
{
// When we recalculate context we could have erased the original BOS token... we need to replace it
const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->ctx));
if (useBOS) {
std::vector<int32_t> myTokens;
myTokens.push_back(llama_token_bos(d_ptr->ctx));
myTokens.insert(myTokens.end(), tokens.begin(), tokens.end());
ctx.n_past += 1;
return llama_eval(d_ptr->ctx, myTokens.data(), myTokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
} else
return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
}
int32_t LLamaModel::contextLength() const

View File

@@ -92,10 +92,10 @@ void LLModel::prompt(const std::string &prompt,
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
promptCtx.tokens.erase(promptCtx.tokens.begin());
promptCtx.tokens.push_back(batch.at(t));
promptCtx.n_past += 1;
if (!promptCallback(batch.at(t)))
return;
}
promptCtx.n_past += batch.size();
i = batch_end;
}
@@ -126,8 +126,6 @@ void LLModel::prompt(const std::string &prompt,
return;
}
promptCtx.n_past += 1;
// display text
for (const auto token : endTokens()) {
if (id == token) return;
@@ -162,6 +160,7 @@ void LLModel::prompt(const std::string &prompt,
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
promptCtx.tokens.erase(promptCtx.tokens.begin());
promptCtx.tokens.push_back(t);
promptCtx.n_past += 1;
//TODO: Conversion to std::string can be avoided here...
if (!responseCallback(t, std::string(tokenToString(t))))
return;