chat: faster KV shift, continue generating, fix stop sequences (#2781)

* Don't stop generating at end of context
* Use llama_kv_cache ops to shift context
* Fix and improve reverse prompt detection
* Replace prompt recalc callback with a flag to disallow context shift
This commit is contained in:
Jared Van Bortel
2024-08-07 11:25:24 -04:00
committed by GitHub
parent 90de2d32f8
commit be66ec8ab5
16 changed files with 285 additions and 230 deletions

View File

@@ -834,7 +834,7 @@ Rectangle {
to: 360
duration: 1000
loops: Animation.Infinite
running: currentResponse && (currentChat.responseInProgress || currentChat.isRecalc)
running: currentResponse && (currentChat.responseInProgress || currentChat.restoringFromText)
}
}
}
@@ -867,13 +867,13 @@ Rectangle {
color: theme.mutedTextColor
}
RowLayout {
visible: currentResponse && ((value === "" && currentChat.responseInProgress) || currentChat.isRecalc)
visible: currentResponse && ((value === "" && currentChat.responseInProgress) || currentChat.restoringFromText)
Text {
color: theme.mutedTextColor
font.pixelSize: theme.fontSizeLarger
text: {
if (currentChat.isRecalc)
return qsTr("recalculating context ...");
if (currentChat.restoringFromText)
return qsTr("restoring from text ...");
switch (currentChat.responseState) {
case Chat.ResponseStopped: return qsTr("response stopped ...");
case Chat.LocalDocsRetrieval: return qsTr("retrieving localdocs: %1 ...").arg(currentChat.collectionList.join(", "));
@@ -1861,7 +1861,7 @@ Rectangle {
}
}
function sendMessage() {
if (textInput.text === "" || currentChat.responseInProgress || currentChat.isRecalc)
if (textInput.text === "" || currentChat.responseInProgress || currentChat.restoringFromText)
return
currentChat.stopGenerating()