mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-07 19:13:28 +00:00
Explicitly clear the kv cache each time we eval tokens to match n_past. (#1808)
This commit is contained in:
parent
2d566710e5
commit
96cee4f9ac
@ -298,6 +298,8 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
|
|||||||
|
|
||||||
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
|
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
|
||||||
{
|
{
|
||||||
|
llama_kv_cache_seq_rm(d_ptr->ctx, 0, ctx.n_past, -1);
|
||||||
|
|
||||||
llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
|
llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
|
||||||
|
|
||||||
batch.n_tokens = tokens.size();
|
batch.n_tokens = tokens.size();
|
||||||
|
Loading…
Reference in New Issue
Block a user