backend: fix extra spaces in tokenization and a CUDA crash (#2778)

Also potentially improves accuracy of BOS insertion, token cache, and logit indexing. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-05 10:30:29 +00:00 · 2024-08-01 10:46:36 -04:00
parent da59c9f5ea
commit 51bd01ae05
10 changed files with 46 additions and 36 deletions
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -73,8 +73,6 @@ llmodel = load_llmodel_library()

 class LLModelPromptContext(ctypes.Structure):
    _fields_ = [
-        ("logits", ctypes.POINTER(ctypes.c_float)),
-        ("logits_size", ctypes.c_size_t),
        ("tokens", ctypes.POINTER(ctypes.c_int32)),
        ("tokens_size", ctypes.c_size_t),
        ("n_past", ctypes.c_int32),
@@ -351,7 +349,6 @@ class LLModel:
    ):
        if self.context is None:
            context = LLModelPromptContext(
-                logits_size=0,
                tokens_size=0,
                n_past=0,
                n_ctx=0,