mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-26 15:31:55 +00:00
Fix regenerate button to be deterministic and bump the llama version to latest we have for gguf.
This commit is contained in:
parent
40c78d2f78
commit
12f943e966
@ -1 +1 @@
|
|||||||
Subproject commit 37a0be313d21f8b61184a3adcaac123353128238
|
Subproject commit 70a6537c4aae9951ba7fff740135ca7dbe14d0f1
|
@ -249,15 +249,6 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
|
|||||||
|
|
||||||
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
|
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
|
||||||
{
|
{
|
||||||
// When we recalculate context we could have erased the original BOS token... we need to replace it
|
|
||||||
const bool useBOS = ctx.n_past == 0 && (ctx.tokens.empty() || ctx.tokens.front() != llama_token_bos(d_ptr->ctx));
|
|
||||||
if (useBOS) {
|
|
||||||
std::vector<int32_t> myTokens;
|
|
||||||
myTokens.push_back(llama_token_bos(d_ptr->ctx));
|
|
||||||
myTokens.insert(myTokens.end(), tokens.begin(), tokens.end());
|
|
||||||
ctx.n_past += 1;
|
|
||||||
return llama_eval(d_ptr->ctx, myTokens.data(), myTokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
|
|
||||||
} else
|
|
||||||
return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
|
return llama_eval(d_ptr->ctx, tokens.data(), tokens.size(), ctx.n_past, d_ptr->n_threads) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,10 +92,10 @@ void LLModel::prompt(const std::string &prompt,
|
|||||||
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
|
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
|
||||||
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
||||||
promptCtx.tokens.push_back(batch.at(t));
|
promptCtx.tokens.push_back(batch.at(t));
|
||||||
|
promptCtx.n_past += 1;
|
||||||
if (!promptCallback(batch.at(t)))
|
if (!promptCallback(batch.at(t)))
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
promptCtx.n_past += batch.size();
|
|
||||||
i = batch_end;
|
i = batch_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,8 +126,6 @@ void LLModel::prompt(const std::string &prompt,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
promptCtx.n_past += 1;
|
|
||||||
|
|
||||||
// display text
|
// display text
|
||||||
for (const auto token : endTokens()) {
|
for (const auto token : endTokens()) {
|
||||||
if (id == token) return;
|
if (id == token) return;
|
||||||
@ -162,6 +160,7 @@ void LLModel::prompt(const std::string &prompt,
|
|||||||
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
|
if (int32_t(promptCtx.tokens.size()) == promptCtx.n_ctx)
|
||||||
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
promptCtx.tokens.erase(promptCtx.tokens.begin());
|
||||||
promptCtx.tokens.push_back(t);
|
promptCtx.tokens.push_back(t);
|
||||||
|
promptCtx.n_past += 1;
|
||||||
//TODO: Conversion to std::string can be avoided here...
|
//TODO: Conversion to std::string can be avoided here...
|
||||||
if (!responseCallback(t, std::string(tokenToString(t))))
|
if (!responseCallback(t, std::string(tokenToString(t))))
|
||||||
return;
|
return;
|
||||||
|
@ -371,7 +371,7 @@ void ChatLLM::regenerateResponse()
|
|||||||
else
|
else
|
||||||
m_ctx.n_past -= m_promptResponseTokens;
|
m_ctx.n_past -= m_promptResponseTokens;
|
||||||
m_ctx.n_past = std::max(0, m_ctx.n_past);
|
m_ctx.n_past = std::max(0, m_ctx.n_past);
|
||||||
m_ctx.tokens.erase(m_ctx.tokens.end() -= m_promptResponseTokens, m_ctx.tokens.end());
|
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
|
||||||
m_promptResponseTokens = 0;
|
m_promptResponseTokens = 0;
|
||||||
m_promptTokens = 0;
|
m_promptTokens = 0;
|
||||||
m_response = std::string();
|
m_response = std::string();
|
||||||
|
Loading…
Reference in New Issue
Block a user