mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-01 16:47:18 +00:00
Use the token cache to infer greater n_past and reuse results (#3073)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -113,10 +113,7 @@ def _old_loop(gpt4all_instance):
|
||||
full_response = gpt4all_instance.chat_completion(
|
||||
MESSAGES,
|
||||
# preferential kwargs for chat ux
|
||||
logits_size=0,
|
||||
tokens_size=0,
|
||||
n_past=0,
|
||||
n_ctx=0,
|
||||
n_predict=200,
|
||||
top_k=40,
|
||||
top_p=0.9,
|
||||
|
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
### Added
|
||||
- Warn on Windows if the Microsoft Visual C++ runtime libraries are not found ([#2920](https://github.com/nomic-ai/gpt4all/pull/2920))
|
||||
- Basic cache for faster prefill when the input shares a prefix with previous context ([#3073](https://github.com/nomic-ai/gpt4all/pull/3073))
|
||||
|
||||
### Changed
|
||||
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
|
||||
|
@@ -116,10 +116,7 @@ llmodel = load_llmodel_library()
|
||||
|
||||
class LLModelPromptContext(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("tokens", ctypes.POINTER(ctypes.c_int32)),
|
||||
("tokens_size", ctypes.c_size_t),
|
||||
("n_past", ctypes.c_int32),
|
||||
("n_ctx", ctypes.c_int32),
|
||||
("n_predict", ctypes.c_int32),
|
||||
("top_k", ctypes.c_int32),
|
||||
("top_p", ctypes.c_float),
|
||||
@@ -393,9 +390,7 @@ class LLModel:
|
||||
):
|
||||
if self.context is None:
|
||||
context = LLModelPromptContext(
|
||||
tokens_size=0,
|
||||
n_past=0,
|
||||
n_ctx=0,
|
||||
n_predict=n_predict,
|
||||
top_k=top_k,
|
||||
top_p=top_p,
|
||||
|
Reference in New Issue
Block a user