llamamodel: fix BERT tokenization after llama.cpp update (#2381)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-07 03:20:26 +00:00 · 2024-05-28 13:11:57 -04:00
parent 0b63ad5eff
commit f1b4092ca6
3 changed files with 6 additions and 6 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -920,11 +920,11 @@ void LLamaModel::embedInternal(
        int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
        if (n_tokens) {
            (void)eos_token;
-            assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
-            tokens.resize(n_tokens - useEOS); // erase EOS/SEP
-        } else {
-            tokens.clear();
+            assert((useEOS && wantBOS) == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
+            if (useEOS && wantBOS)
+                n_tokens--; // erase EOS/SEP
        }
+        tokens.resize(n_tokens);
    };

    // tokenize the texts