chat: faster KV shift, continue generating, fix stop sequences (#2781)

* Don't stop generating at end of context * Use llama_kv_cache ops to shift context * Fix and improve reverse prompt detection * Replace prompt recalc callback with a flag to disallow context shift
2025-09-06 02:50:36 +00:00 · 2024-08-07 11:25:24 -04:00
parent 90de2d32f8
commit be66ec8ab5
16 changed files with 285 additions and 230 deletions
--- a/gpt4all-chat/chatapi.cpp
+++ b/gpt4all-chat/chatapi.cpp
@@ -90,13 +90,13 @@ void ChatAPI::prompt(const std::string &prompt,
                     const std::string &promptTemplate,
                     std::function<bool(int32_t)> promptCallback,
                     std::function<bool(int32_t, const std::string&)> responseCallback,
-                     std::function<bool(bool)> recalculateCallback,
+                     bool allowContextShift,
                     PromptContext &promptCtx,
                     bool special,
                     std::string *fakeReply) {

    Q_UNUSED(promptCallback);
-    Q_UNUSED(recalculateCallback);
+    Q_UNUSED(allowContextShift);
    Q_UNUSED(special);

    if (!isModelLoaded()) {