backend: fix extra spaces in tokenization and a CUDA crash (#2778)

Also potentially improves accuracy of BOS insertion, token cache, and logit indexing. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-08 11:58:53 +00:00 · 2024-08-01 10:46:36 -04:00
parent da59c9f5ea
commit 51bd01ae05
10 changed files with 46 additions and 36 deletions
--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@@ -117,9 +117,6 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
        return response_callback(token_id, response.c_str());
    };

-    if (size_t(ctx->n_past) < wrapper->promptContext.tokens.size())
-        wrapper->promptContext.tokens.resize(ctx->n_past);
-
    // Copy the C prompt context
    wrapper->promptContext.n_past = ctx->n_past;
    wrapper->promptContext.n_ctx = ctx->n_ctx;