backend: fix buffer overrun in repeat penalty code

Caught with AddressSanitizer running a basic prompt test against llmodel standalone. This fix allows ASan builds to complete a simple prompt without illegal accesses but there are still notably several leaks.
2025-09-06 11:00:48 +00:00 · 2023-05-16 23:20:08 -07:00
parent 26cb31c4e6
commit e6fd0a240d
3 changed files with 9 additions and 6 deletions
--- a/gpt4all-backend/gptj.cpp
+++ b/gpt4all-backend/gptj.cpp
@@ -993,9 +993,10 @@ void GPTJ::prompt(const std::string &prompt,
        gpt_vocab::id id = 0;
        {
            const int64_t t_start_sample_us = ggml_time_us();
+            const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
            id = gpt_sample_top_k_top_p(d_ptr->vocab, n_vocab,
-                promptCtx.tokens.data() + promptCtx.n_ctx - promptCtx.n_ctx,
-                promptCtx.n_ctx,
+                promptCtx.tokens.data() + promptCtx.tokens.size() - n_prev_toks,
+                n_prev_toks,
                promptCtx.logits,
                promptCtx.top_k, promptCtx.top_p, promptCtx.temp,
                promptCtx.repeat_penalty,