mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 19:10:15 +00:00
chat: faster KV shift, continue generating, fix stop sequences (#2781)
* Don't stop generating at end of context * Use llama_kv_cache ops to shift context * Fix and improve reverse prompt detection * Replace prompt recalc callback with a flag to disallow context shift
This commit is contained in:
@@ -93,7 +93,7 @@ class Chat;
|
||||
class ChatLLM : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
|
||||
Q_PROPERTY(bool restoringFromText READ restoringFromText NOTIFY restoringFromTextChanged)
|
||||
Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
|
||||
Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
|
||||
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
|
||||
@@ -121,7 +121,7 @@ public:
|
||||
ModelInfo modelInfo() const;
|
||||
void setModelInfo(const ModelInfo &info);
|
||||
|
||||
bool isRecalc() const { return m_isRecalc; }
|
||||
bool restoringFromText() const { return m_restoringFromText; }
|
||||
|
||||
void acquireModel();
|
||||
void resetModel();
|
||||
@@ -172,7 +172,7 @@ public Q_SLOTS:
|
||||
void processRestoreStateFromText();
|
||||
|
||||
Q_SIGNALS:
|
||||
void recalcChanged();
|
||||
void restoringFromTextChanged();
|
||||
void loadedModelInfoChanged();
|
||||
void modelLoadingPercentageChanged(float);
|
||||
void modelLoadingError(const QString &error);
|
||||
@@ -201,19 +201,14 @@ protected:
|
||||
int32_t repeat_penalty_tokens);
|
||||
bool handlePrompt(int32_t token);
|
||||
bool handleResponse(int32_t token, const std::string &response);
|
||||
bool handleRecalculate(bool isRecalc);
|
||||
bool handleNamePrompt(int32_t token);
|
||||
bool handleNameResponse(int32_t token, const std::string &response);
|
||||
bool handleNameRecalculate(bool isRecalc);
|
||||
bool handleSystemPrompt(int32_t token);
|
||||
bool handleSystemResponse(int32_t token, const std::string &response);
|
||||
bool handleSystemRecalculate(bool isRecalc);
|
||||
bool handleRestoreStateFromTextPrompt(int32_t token);
|
||||
bool handleRestoreStateFromTextResponse(int32_t token, const std::string &response);
|
||||
bool handleRestoreStateFromTextRecalculate(bool isRecalc);
|
||||
bool handleQuestionPrompt(int32_t token);
|
||||
bool handleQuestionResponse(int32_t token, const std::string &response);
|
||||
bool handleQuestionRecalculate(bool isRecalc);
|
||||
void saveState();
|
||||
void restoreState();
|
||||
|
||||
@@ -236,7 +231,7 @@ private:
|
||||
QThread m_llmThread;
|
||||
std::atomic<bool> m_stopGenerating;
|
||||
std::atomic<bool> m_shouldBeLoaded;
|
||||
std::atomic<bool> m_isRecalc;
|
||||
std::atomic<bool> m_restoringFromText; // status indication
|
||||
std::atomic<bool> m_forceUnloadModel;
|
||||
std::atomic<bool> m_markedForDeletion;
|
||||
bool m_isServer;
|
||||
|
Reference in New Issue
Block a user