mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-10 12:59:09 +00:00
backend: fix buffer overrun in repeat penalty code
Caught with AddressSanitizer running a basic prompt test against llmodel standalone. This fix allows ASan builds to complete a simple prompt without illegal accesses but there are still notably several leaks.
This commit is contained in:
@@ -180,9 +180,10 @@ void LLamaModel::prompt(const std::string &prompt,
|
||||
int32_t totalPredictions = 0;
|
||||
for (int i = 0; i < promptCtx.n_predict; i++) {
|
||||
// sample next token
|
||||
const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
|
||||
llama_token id = llama_sample_top_p_top_k(d_ptr->ctx,
|
||||
promptCtx.tokens.data() + promptCtx.n_ctx - promptCtx.repeat_last_n,
|
||||
promptCtx.repeat_last_n, promptCtx.top_k, promptCtx.top_p, promptCtx.temp,
|
||||
promptCtx.tokens.data() + promptCtx.tokens.size() - n_prev_toks,
|
||||
n_prev_toks, promptCtx.top_k, promptCtx.top_p, promptCtx.temp,
|
||||
promptCtx.repeat_penalty);
|
||||
|
||||
// Check if the context has run out...
|
||||
|
Reference in New Issue
Block a user