Show token generation speed in gui. (#1020)

2025-09-06 02:50:36 +00:00 · 2023-06-19 11:34:53 -07:00
parent fd419caa55
commit 2b6cc99a31
5 changed files with 84 additions and 2 deletions
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@@ -57,6 +57,7 @@ void Chat::connectLLM()
    connect(m_llmodel, &ChatLLM::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
+    connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);

    connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
    connect(this, &Chat::modelNameChangeRequested, m_llmodel, &ChatLLM::modelNameChangeRequested, Qt::QueuedConnection);
@@ -102,6 +103,8 @@ void Chat::resetResponseState()
    if (m_responseInProgress && m_responseState == Chat::LocalDocsRetrieval)
        return;

+    m_tokenSpeed = QString();
+    emit tokenSpeedChanged();
    m_responseInProgress = true;
    m_responseState = Chat::LocalDocsRetrieval;
    emit responseInProgressChanged();
@@ -187,6 +190,9 @@ void Chat::promptProcessing()

 void Chat::responseStopped()
 {
+    m_tokenSpeed = QString();
+    emit tokenSpeedChanged();
+
    const QString chatResponse = response();
    QList<QString> references;
    QList<QString> referencesContext;
@@ -336,6 +342,12 @@ void Chat::handleModelLoadingError(const QString &error)
    emit modelLoadingErrorChanged();
 }

+void Chat::handleTokenSpeedChanged(const QString &tokenSpeed)
+{
+    m_tokenSpeed = tokenSpeed;
+    emit tokenSpeedChanged();
+}
+
 bool Chat::serialize(QDataStream &stream, int version) const
 {
    stream << m_creationDate;