mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-01 16:47:18 +00:00
chat: faster KV shift, continue generating, fix stop sequences (#2781)
* Don't stop generating at end of context * Use llama_kv_cache ops to shift context * Fix and improve reverse prompt detection * Replace prompt recalc callback with a flag to disallow context shift
This commit is contained in:
@@ -128,7 +128,6 @@ llmodel.llmodel_isModelLoaded.restype = ctypes.c_bool
|
||||
|
||||
PromptCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_int32)
|
||||
ResponseCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_int32, ctypes.c_char_p)
|
||||
RecalculateCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_bool)
|
||||
EmbCancelCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.POINTER(ctypes.c_uint), ctypes.c_uint, ctypes.c_char_p)
|
||||
|
||||
llmodel.llmodel_prompt.argtypes = [
|
||||
@@ -137,7 +136,7 @@ llmodel.llmodel_prompt.argtypes = [
|
||||
ctypes.c_char_p,
|
||||
PromptCallback,
|
||||
ResponseCallback,
|
||||
RecalculateCallback,
|
||||
ctypes.c_bool,
|
||||
ctypes.POINTER(LLModelPromptContext),
|
||||
ctypes.c_bool,
|
||||
ctypes.c_char_p,
|
||||
@@ -513,7 +512,7 @@ class LLModel:
|
||||
ctypes.c_char_p(prompt_template.encode()),
|
||||
PromptCallback(self._prompt_callback),
|
||||
ResponseCallback(self._callback_decoder(callback)),
|
||||
RecalculateCallback(self._recalculate_callback),
|
||||
True,
|
||||
self.context,
|
||||
special,
|
||||
ctypes.c_char_p(),
|
||||
@@ -606,8 +605,3 @@ class LLModel:
|
||||
@staticmethod
|
||||
def _prompt_callback(token_id: int) -> bool:
|
||||
return True
|
||||
|
||||
# Empty recalculate callback
|
||||
@staticmethod
|
||||
def _recalculate_callback(is_recalculating: bool) -> bool:
|
||||
return is_recalculating
|
||||
|
Reference in New Issue
Block a user