persistent threadcount setting

threadcount is now on the Settings object and
gets reapplied after a model switch
This commit is contained in:
Aaron Miller 2023-04-24 12:24:55 -07:00 committed by AT
parent ac9d9b9476
commit 6e92d93b53
3 changed files with 29 additions and 6 deletions

13
llm.cpp
View File

@ -101,8 +101,10 @@ bool LLMObject::loadModelPrivate(const QString &modelName)
} }
void LLMObject::setThreadCount(int32_t n_threads) { void LLMObject::setThreadCount(int32_t n_threads) {
if (m_llmodel->threadCount() != n_threads) {
m_llmodel->setThreadCount(n_threads); m_llmodel->setThreadCount(n_threads);
emit threadCountChanged(); emit threadCountChanged();
}
} }
int32_t LLMObject::threadCount() { int32_t LLMObject::threadCount() {
@ -297,6 +299,7 @@ LLM::LLM()
connect(m_llmodel, &LLMObject::modelNameChanged, this, &LLM::modelNameChanged, Qt::QueuedConnection); connect(m_llmodel, &LLMObject::modelNameChanged, this, &LLM::modelNameChanged, Qt::QueuedConnection);
connect(m_llmodel, &LLMObject::modelListChanged, this, &LLM::modelListChanged, Qt::QueuedConnection); connect(m_llmodel, &LLMObject::modelListChanged, this, &LLM::modelListChanged, Qt::QueuedConnection);
connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::threadCountChanged, Qt::QueuedConnection); connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::threadCountChanged, Qt::QueuedConnection);
connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::syncThreadCount, Qt::QueuedConnection);
connect(this, &LLM::promptRequested, m_llmodel, &LLMObject::prompt, Qt::QueuedConnection); connect(this, &LLM::promptRequested, m_llmodel, &LLMObject::prompt, Qt::QueuedConnection);
@ -375,8 +378,16 @@ QList<QString> LLM::modelList() const
return m_llmodel->modelList(); return m_llmodel->modelList();
} }
void LLM::syncThreadCount() {
emit setThreadCountRequested(m_desiredThreadCount);
}
void LLM::setThreadCount(int32_t n_threads) { void LLM::setThreadCount(int32_t n_threads) {
emit setThreadCountRequested(n_threads); if (n_threads <= 0) {
n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
}
m_desiredThreadCount = n_threads;
syncThreadCount();
} }
int32_t LLM::threadCount() { int32_t LLM::threadCount() {

2
llm.h
View File

@ -82,6 +82,7 @@ public:
Q_INVOKABLE void resetResponse(); Q_INVOKABLE void resetResponse();
Q_INVOKABLE void resetContext(); Q_INVOKABLE void resetContext();
Q_INVOKABLE void stopGenerating(); Q_INVOKABLE void stopGenerating();
Q_INVOKABLE void syncThreadCount();
Q_INVOKABLE void setThreadCount(int32_t n_threads); Q_INVOKABLE void setThreadCount(int32_t n_threads);
Q_INVOKABLE int32_t threadCount(); Q_INVOKABLE int32_t threadCount();
@ -116,6 +117,7 @@ private Q_SLOTS:
private: private:
LLMObject *m_llmodel; LLMObject *m_llmodel;
int32_t m_desiredThreadCount;
bool m_responseInProgress; bool m_responseInProgress;
private: private:

View File

@ -31,6 +31,7 @@ Dialog {
property int defaultTopK: 40 property int defaultTopK: 40
property int defaultMaxLength: 4096 property int defaultMaxLength: 4096
property int defaultPromptBatchSize: 9 property int defaultPromptBatchSize: 9
property int defaultThreadCount: 0
property string defaultPromptTemplate: "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. property string defaultPromptTemplate: "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt: ### Prompt:
%1 %1
@ -42,6 +43,7 @@ Dialog {
property alias maxLength: settings.maxLength property alias maxLength: settings.maxLength
property alias promptBatchSize: settings.promptBatchSize property alias promptBatchSize: settings.promptBatchSize
property alias promptTemplate: settings.promptTemplate property alias promptTemplate: settings.promptTemplate
property alias threadCount: settings.threadCount
Settings { Settings {
id: settings id: settings
@ -50,6 +52,7 @@ Dialog {
property int topK: settingsDialog.defaultTopK property int topK: settingsDialog.defaultTopK
property int maxLength: settingsDialog.defaultMaxLength property int maxLength: settingsDialog.defaultMaxLength
property int promptBatchSize: settingsDialog.defaultPromptBatchSize property int promptBatchSize: settingsDialog.defaultPromptBatchSize
property int threadCount: settingsDialog.defaultThreadCount
property string promptTemplate: settingsDialog.defaultPromptTemplate property string promptTemplate: settingsDialog.defaultPromptTemplate
} }
@ -60,7 +63,13 @@ Dialog {
settings.maxLength = defaultMaxLength; settings.maxLength = defaultMaxLength;
settings.promptBatchSize = defaultPromptBatchSize; settings.promptBatchSize = defaultPromptBatchSize;
settings.promptTemplate = defaultPromptTemplate; settings.promptTemplate = defaultPromptTemplate;
settings.threadCount = defaultThreadCount
settings.sync() settings.sync()
LLM.threadCount = settings.threadCount;
}
Component.onCompleted: {
LLM.threadCount = settings.threadCount;
} }
Component.onDestruction: { Component.onDestruction: {
@ -264,7 +273,7 @@ Dialog {
Layout.column: 0 Layout.column: 0
} }
TextField { TextField {
text: LLM.threadCount.toString() text: settingsDialog.threadCount.toString()
color: theme.textColor color: theme.textColor
background: Rectangle { background: Rectangle {
implicitWidth: 150 implicitWidth: 150
@ -272,7 +281,7 @@ Dialog {
radius: 10 radius: 10
} }
padding: 10 padding: 10
ToolTip.text: qsTr("Amount of processing threads to use") ToolTip.text: qsTr("Amount of processing threads to use, a setting of 0 will use the lesser of 4 or your number of CPU threads")
ToolTip.visible: hovered ToolTip.visible: hovered
Layout.row: 5 Layout.row: 5
Layout.column: 1 Layout.column: 1
@ -280,10 +289,11 @@ Dialog {
onAccepted: { onAccepted: {
var val = parseInt(text) var val = parseInt(text)
if (!isNaN(val)) { if (!isNaN(val)) {
settingsDialog.threadCount = val
LLM.threadCount = val LLM.threadCount = val
focus = false focus = false
} else { } else {
text = settingsDialog.nThreads.toString() text = settingsDialog.threadCount.toString()
} }
} }
Accessible.role: Accessible.EditableText Accessible.role: Accessible.EditableText