backend: fix extra spaces in tokenization and a CUDA crash (#2778)

Also potentially improves accuracy of BOS insertion, token cache, and logit indexing. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-11 05:19:31 +00:00 · 2024-08-01 10:46:36 -04:00
parent da59c9f5ea
commit 51bd01ae05
10 changed files with 46 additions and 36 deletions
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@@ -53,7 +53,7 @@ private:
    bool m_supportsCompletion = false;

 protected:
-    std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) const override;
+    std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
    std::string tokenToString(Token id) const override;
    Token sampleToken(PromptContext &ctx) const override;
    bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;