chat: faster KV shift, continue generating, fix stop sequences (#2781)

* Don't stop generating at end of context * Use llama_kv_cache ops to shift context * Fix and improve reverse prompt detection * Replace prompt recalc callback with a flag to disallow context shift
2025-09-04 01:54:49 +00:00 · 2024-08-07 11:25:24 -04:00
parent 90de2d32f8
commit be66ec8ab5
16 changed files with 285 additions and 230 deletions
--- a/gpt4all-chat/qml/ChatView.qml
+++ b/gpt4all-chat/qml/ChatView.qml
@@ -834,7 +834,7 @@ Rectangle {
                                            to: 360
                                            duration: 1000
                                            loops: Animation.Infinite
-                                            running: currentResponse && (currentChat.responseInProgress || currentChat.isRecalc)
+                                            running: currentResponse && (currentChat.responseInProgress || currentChat.restoringFromText)
                                        }
                                    }
                                }
@@ -867,13 +867,13 @@ Rectangle {
                                            color: theme.mutedTextColor
                                        }
                                        RowLayout {
-                                            visible: currentResponse && ((value === "" && currentChat.responseInProgress) || currentChat.isRecalc)
+                                            visible: currentResponse && ((value === "" && currentChat.responseInProgress) || currentChat.restoringFromText)
                                            Text {
                                                color: theme.mutedTextColor
                                                font.pixelSize: theme.fontSizeLarger
                                                text: {
-                                                    if (currentChat.isRecalc)
-                                                        return qsTr("recalculating context ...");
+                                                    if (currentChat.restoringFromText)
+                                                        return qsTr("restoring from text ...");
                                                    switch (currentChat.responseState) {
                                                    case Chat.ResponseStopped: return qsTr("response stopped ...");
                                                    case Chat.LocalDocsRetrieval: return qsTr("retrieving localdocs: %1 ...").arg(currentChat.collectionList.join(", "));
@@ -1861,7 +1861,7 @@ Rectangle {
                                              }
                                          }
                    function sendMessage() {
-                        if (textInput.text === "" || currentChat.responseInProgress || currentChat.isRecalc)
+                        if (textInput.text === "" || currentChat.responseInProgress || currentChat.restoringFromText)
                            return

                        currentChat.stopGenerating()