backend: fix extra spaces in tokenization and a CUDA crash (#2778)

Also potentially improves accuracy of BOS insertion, token cache, and logit indexing.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-08-01 10:46:36 -04:00
committed by GitHub
parent da59c9f5ea
commit 51bd01ae05
10 changed files with 46 additions and 36 deletions

View File

@@ -73,8 +73,6 @@ llmodel = load_llmodel_library()
class LLModelPromptContext(ctypes.Structure):
_fields_ = [
("logits", ctypes.POINTER(ctypes.c_float)),
("logits_size", ctypes.c_size_t),
("tokens", ctypes.POINTER(ctypes.c_int32)),
("tokens_size", ctypes.c_size_t),
("n_past", ctypes.c_int32),
@@ -351,7 +349,6 @@ class LLModel:
):
if self.context is None:
context = LLModelPromptContext(
logits_size=0,
tokens_size=0,
n_past=0,
n_ctx=0,