Use the token cache to infer greater n_past and reuse results (#3073)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-10-31 11:19:12 -04:00
committed by GitHub
parent 62cab695eb
commit f07e2e63df
15 changed files with 320 additions and 169 deletions

View File

@@ -113,10 +113,7 @@ def _old_loop(gpt4all_instance):
full_response = gpt4all_instance.chat_completion(
MESSAGES,
# preferential kwargs for chat ux
logits_size=0,
tokens_size=0,
n_past=0,
n_ctx=0,
n_predict=200,
top_k=40,
top_p=0.9,