Use the token cache to infer greater n_past and reuse results (#3073)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-06 19:10:15 +00:00 · 2024-10-31 11:19:12 -04:00
parent 62cab695eb
commit f07e2e63df
15 changed files with 320 additions and 169 deletions
--- a/gpt4all-bindings/cli/app.py
+++ b/gpt4all-bindings/cli/app.py
@@ -113,10 +113,7 @@ def _old_loop(gpt4all_instance):
        full_response = gpt4all_instance.chat_completion(
            MESSAGES,
            # preferential kwargs for chat ux
-            logits_size=0,
-            tokens_size=0,
            n_past=0,
-            n_ctx=0,
            n_predict=200,
            top_k=40,
            top_p=0.9,