Show token generation speed in gui. (#1020)

This commit is contained in:
AT
2023-06-19 11:34:53 -07:00
committed by GitHub
parent fd419caa55
commit 2b6cc99a31
5 changed files with 84 additions and 2 deletions

View File

@@ -23,6 +23,46 @@ struct LLModelInfo {
// must be able to serialize the information even if it is in the unloaded state
};
class TokenTimer : public QObject {
Q_OBJECT
public:
explicit TokenTimer(QObject *parent)
: QObject(parent)
, m_elapsed(0) {}
static int rollingAverage(int oldAvg, int newNumber, int n)
{
// i.e. to calculate the new average after then nth number,
// you multiply the old average by n1, add the new number, and divide the total by n.
return qRound(((float(oldAvg) * (n - 1)) + newNumber) / float(n));
}
void start() { m_tokens = 0; m_elapsed = 0; m_time.invalidate(); }
void stop() { handleTimeout(); }
void inc() {
if (!m_time.isValid())
m_time.start();
++m_tokens;
if (m_time.elapsed() > 999)
handleTimeout();
}
Q_SIGNALS:
void report(const QString &speed);
private Q_SLOTS:
void handleTimeout()
{
m_elapsed += m_time.restart();
emit report(QString("%1 tokens/sec").arg(m_tokens / float(m_elapsed / 1000.0f), 0, 'g', 2));
}
private:
QElapsedTimer m_time;
qint64 m_elapsed;
quint32 m_tokens;
};
class Chat;
class ChatLLM : public QObject
{
@@ -73,6 +113,7 @@ public Q_SLOTS:
void generateName();
void handleChatIdChanged();
void handleShouldBeLoadedChanged();
void handleThreadStarted();
Q_SIGNALS:
void isModelLoadedChanged();
@@ -89,7 +130,7 @@ Q_SIGNALS:
void threadStarted();
void shouldBeLoadedChanged();
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
void reportSpeed(const QString &speed);
protected:
bool handlePrompt(int32_t token);
@@ -112,6 +153,7 @@ protected:
quint32 m_responseLogits;
QString m_modelName;
Chat *m_chat;
TokenTimer *m_timer;
QByteArray m_state;
QThread m_llmThread;
std::atomic<bool> m_stopGenerating;