From 8c9f26e2494d5e5bbe25d49732b8d703fddb5b8a Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Tue, 4 Feb 2025 12:08:17 -0500
Subject: [PATCH] Ignore DeepSeek-R1 "think" content in name/follow-up
 responses (#3458)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
---
 gpt4all-chat/CHANGELOG.md           |   1 +
 gpt4all-chat/src/chat.cpp           |   9 +-
 gpt4all-chat/src/chatllm.cpp        | 349 +++++++++++++++++++---------
 gpt4all-chat/src/chatllm.h          |   3 +
 gpt4all-chat/src/modellist.h        |   2 +-
 gpt4all-chat/src/toolcallparser.cpp |  30 ++-
 gpt4all-chat/src/toolcallparser.h   |  38 +--
 7 files changed, 295 insertions(+), 137 deletions(-)
diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md
index 579f73a3..4eec33be 100644
--- a/gpt4all-chat/CHANGELOG.md
+++ b/gpt4all-chat/CHANGELOG.md
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ### Fixed
 - Fix "index N is not a prompt" when using LocalDocs with reasoning ([#3451](https://github.com/nomic-ai/gpt4all/pull/3451)
 - Work around rendering artifacts on Snapdragon SoCs with Windows ([#3450](https://github.com/nomic-ai/gpt4all/pull/3450))
+- Prevent DeepSeek-R1 reasoning from appearing in chat names and follow-up questions ([#3458](https://github.com/nomic-ai/gpt4all/pull/3458))
 
 ## [3.8.0] - 2025-01-30
 
diff --git a/gpt4all-chat/src/chat.cpp b/gpt4all-chat/src/chat.cpp
index 725d2374..ec0ecf99 100644
--- a/gpt4all-chat/src/chat.cpp
+++ b/gpt4all-chat/src/chat.cpp
@@ -255,7 +255,7 @@ void Chat::responseStopped(qint64 promptResponseMs)
 
     ToolCallParser parser;
     parser.update(possibleToolcall.toUtf8());
-    if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkTag)
+    if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkStartTag)
         processToolCall(parser.toolCall());
     else
         responseComplete();
@@ -381,11 +381,8 @@ void Chat::trySwitchContextOfLoadedModel()
 
 void Chat::generatedNameChanged(const QString &name)
 {
-    // Only use the first three words maximum and remove newlines and extra spaces
-    m_generatedName = name.simplified();
-    QStringList words = m_generatedName.split(' ', Qt::SkipEmptyParts);
-    int wordCount = qMin(7, words.size());
-    m_name = words.mid(0, wordCount).join(' ');
+    m_generatedName = name;
+    m_name = name;
     emit nameChanged();
     m_needsSave = true;
 }
diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
index f8ac85a4..8bca1143 100644
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@@ -29,6 +29,7 @@
 #include <QRegularExpression>
 #include <QRegularExpressionMatch>
 #include <QSet>
+#include <QTextStream>
 #include <QUrl>
 #include <QWaitCondition>
 #include <Qt>
@@ -92,6 +93,70 @@ static const std::shared_ptr<minja::Context> &jinjaEnv()
     return environment;
 }
 
+class BaseResponseHandler {
+public:
+    virtual void onSplitIntoTwo    (const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) = 0;
+    virtual void onSplitIntoThree  (const QString &secondBuffer, const QString &thirdBuffer)                          = 0;
+    // "old-style" responses, with all of the implementation details left in
+    virtual void onOldResponseChunk(const QByteArray &chunk)                                                          = 0;
+    // notify of a "new-style" response that has been cleaned of tool calling
+    virtual bool onBufferResponse  (const QString &response, int bufferIdx)                                           = 0;
+    // notify of a "new-style" response, no tool calling applicable
+    virtual bool onRegularResponse ()                                                                                 = 0;
+    virtual bool getStopGenerating () const                                                                           = 0;
+};
+
+static auto promptModelWithTools(
+    LLModel *model, const LLModel::PromptCallback &promptCallback, BaseResponseHandler &respHandler,
+    const LLModel::PromptContext &ctx, const QByteArray &prompt, const QStringList &toolNames
+) -> std::pair<QStringList, bool>
+{
+    ToolCallParser toolCallParser(toolNames);
+    auto handleResponse = [&toolCallParser, &respHandler](LLModel::Token token, std::string_view piece) -> bool {
+        Q_UNUSED(token)
+
+        toolCallParser.update(piece.data());
+
+        // Split the response into two if needed
+        if (toolCallParser.numberOfBuffers() < 2 && toolCallParser.splitIfPossible()) {
+            const auto parseBuffers = toolCallParser.buffers();
+            Q_ASSERT(parseBuffers.size() == 2);
+            respHandler.onSplitIntoTwo(toolCallParser.startTag(), parseBuffers.at(0), parseBuffers.at(1));
+        }
+
+        // Split the response into three if needed
+        if (toolCallParser.numberOfBuffers() < 3 && toolCallParser.startTag() == ToolCallConstants::ThinkStartTag
+            && toolCallParser.splitIfPossible()) {
+            const auto parseBuffers = toolCallParser.buffers();
+            Q_ASSERT(parseBuffers.size() == 3);
+            respHandler.onSplitIntoThree(parseBuffers.at(1), parseBuffers.at(2));
+        }
+
+        respHandler.onOldResponseChunk(QByteArray::fromRawData(piece.data(), piece.size()));
+
+        bool ok;
+        const auto parseBuffers = toolCallParser.buffers();
+        if (parseBuffers.size() > 1) {
+            ok = respHandler.onBufferResponse(parseBuffers.last(), parseBuffers.size() - 1);
+        } else {
+            ok = respHandler.onRegularResponse();
+        }
+        if (!ok)
+            return false;
+
+        const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
+            && toolCallParser.startTag() != ToolCallConstants::ThinkStartTag;
+
+        return !shouldExecuteToolCall && !respHandler.getStopGenerating();
+    };
+    model->prompt(std::string_view(prompt), promptCallback, handleResponse, ctx);
+
+    const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
+        && toolCallParser.startTag() != ToolCallConstants::ThinkStartTag;
+
+    return { toolCallParser.buffers(), shouldExecuteToolCall };
+}
+
 class LLModelStore {
 public:
     static LLModelStore *globalInstance();
@@ -882,6 +947,62 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL
     };
 }
 
+class ChatViewResponseHandler : public BaseResponseHandler {
+public:
+    ChatViewResponseHandler(ChatLLM *cllm, QElapsedTimer *totalTime, ChatLLM::PromptResult *result)
+        : m_cllm(cllm), m_totalTime(totalTime), m_result(result) {}
+
+    void onSplitIntoTwo(const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) override
+    {
+        if (startTag == ToolCallConstants::ThinkStartTag)
+            m_cllm->m_chatModel->splitThinking({ firstBuffer, secondBuffer });
+        else
+            m_cllm->m_chatModel->splitToolCall({ firstBuffer, secondBuffer });
+    }
+
+    void onSplitIntoThree(const QString &secondBuffer, const QString &thirdBuffer) override
+    {
+        m_cllm->m_chatModel->endThinking({ secondBuffer, thirdBuffer }, m_totalTime->elapsed());
+    }
+
+    void onOldResponseChunk(const QByteArray &chunk) override
+    {
+        m_result->responseTokens++;
+        m_cllm->m_timer->inc();
+        m_result->response.append(chunk);
+    }
+
+    bool onBufferResponse(const QString &response, int bufferIdx) override
+    {
+        Q_UNUSED(bufferIdx)
+        try {
+            m_cllm->m_chatModel->setResponseValue(response);
+        } catch (const std::exception &e) {
+            // We have a try/catch here because the main thread might have removed the response from
+            // the chatmodel by erasing the conversation during the response... the main thread sets
+            // m_stopGenerating before doing so, but it doesn't wait after that to reset the chatmodel
+            Q_ASSERT(m_cllm->m_stopGenerating);
+            return false;
+        }
+        emit m_cllm->responseChanged();
+        return true;
+    }
+
+    bool onRegularResponse() override
+    {
+        auto respStr = QString::fromUtf8(m_result->response);
+        return onBufferResponse(removeLeadingWhitespace(respStr), 0);
+    }
+
+    bool getStopGenerating() const override
+    { return m_cllm->m_stopGenerating; }
+
+private:
+    ChatLLM               *m_cllm;
+    QElapsedTimer         *m_totalTime;
+    ChatLLM::PromptResult *m_result;
+};
+
 auto ChatLLM::promptInternal(
     const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
     const LLModel::PromptContext &ctx,
@@ -931,64 +1052,20 @@ auto ChatLLM::promptInternal(
 
     QElapsedTimer totalTime;
     totalTime.start();
+    ChatViewResponseHandler respHandler(this, &totalTime, &result);
+
     m_timer->start();
-
-    ToolCallParser toolCallParser;
-    auto handleResponse = [this, &result, &toolCallParser, &totalTime](LLModel::Token token, std::string_view piece) -> bool {
-        Q_UNUSED(token)
-        result.responseTokens++;
-        m_timer->inc();
-
-        toolCallParser.update(piece.data());
-
-        // Split the response into two if needed and create chat items
-        if (toolCallParser.numberOfBuffers() < 2 && toolCallParser.splitIfPossible()) {
-            const auto parseBuffers = toolCallParser.buffers();
-            Q_ASSERT(parseBuffers.size() == 2);
-            if (toolCallParser.startTag() == ToolCallConstants::ThinkTag)
-                m_chatModel->splitThinking({parseBuffers.at(0), parseBuffers.at(1)});
-            else
-                m_chatModel->splitToolCall({parseBuffers.at(0), parseBuffers.at(1)});
-        }
-
-        // Split the response into three if needed and create chat items
-        if (toolCallParser.numberOfBuffers() < 3 && toolCallParser.startTag() == ToolCallConstants::ThinkTag
-            && toolCallParser.splitIfPossible()) {
-            const auto parseBuffers = toolCallParser.buffers();
-            Q_ASSERT(parseBuffers.size() == 3);
-            m_chatModel->endThinking({parseBuffers.at(1), parseBuffers.at(2)}, totalTime.elapsed());
-        }
-
-        result.response.append(piece.data(), piece.size());
-        auto respStr = QString::fromUtf8(result.response);
-
-        try {
-            const auto parseBuffers = toolCallParser.buffers();
-            if (parseBuffers.size() > 1)
-                m_chatModel->setResponseValue(parseBuffers.last());
-            else
-                m_chatModel->setResponseValue(removeLeadingWhitespace(respStr));
-        } catch (const std::exception &e) {
-            // We have a try/catch here because the main thread might have removed the response from
-            // the chatmodel by erasing the conversation during the response... the main thread sets
-            // m_stopGenerating before doing so, but it doesn't wait after that to reset the chatmodel
-            Q_ASSERT(m_stopGenerating);
-            return false;
-        }
-
-        emit responseChanged();
-
-        const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
-            && toolCallParser.startTag() != ToolCallConstants::ThinkTag;
-
-        return !shouldExecuteToolCall && !m_stopGenerating;
-    };
-
+    QStringList finalBuffers;
+    bool        shouldExecuteTool;
     try {
         emit promptProcessing();
         m_llModelInfo.model->setThreadCount(mySettings->threadCount());
         m_stopGenerating = false;
-        m_llModelInfo.model->prompt(conversation, handlePrompt, handleResponse, ctx);
+        std::tie(finalBuffers, shouldExecuteTool) = promptModelWithTools(
+            m_llModelInfo.model.get(), handlePrompt, respHandler, ctx,
+            QByteArray::fromRawData(conversation.data(), conversation.size()),
+            ToolCallConstants::AllTagNames
+        );
     } catch (...) {
         m_timer->stop();
         throw;
@@ -997,22 +1074,18 @@ auto ChatLLM::promptInternal(
     m_timer->stop();
     qint64 elapsed = totalTime.elapsed();
 
-    const auto parseBuffers = toolCallParser.buffers();
-    const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
-        && toolCallParser.startTag() != ToolCallConstants::ThinkTag;
-
     // trim trailing whitespace
     auto respStr = QString::fromUtf8(result.response);
-    if (!respStr.isEmpty() && (std::as_const(respStr).back().isSpace() || parseBuffers.size() > 1)) {
-        if (parseBuffers.size() > 1)
-            m_chatModel->setResponseValue(parseBuffers.last());
+    if (!respStr.isEmpty() && (std::as_const(respStr).back().isSpace() || finalBuffers.size() > 1)) {
+        if (finalBuffers.size() > 1)
+            m_chatModel->setResponseValue(finalBuffers.last());
         else
             m_chatModel->setResponseValue(respStr.trimmed());
         emit responseChanged();
     }
 
     bool doQuestions = false;
-    if (!m_isServer && messageItems && !shouldExecuteToolCall) {
+    if (!m_isServer && messageItems && !shouldExecuteTool) {
         switch (mySettings->suggestionMode()) {
             case SuggestionMode::On:            doQuestions = true;          break;
             case SuggestionMode::LocalDocsOnly: doQuestions = usedLocalDocs; break;
@@ -1090,6 +1163,66 @@ void ChatLLM::reloadModel()
         loadModel(m);
 }
 
+// This class throws discards the text within thinking tags, for use with chat names and follow-up questions.
+class SimpleResponseHandler : public BaseResponseHandler {
+public:
+    SimpleResponseHandler(ChatLLM *cllm)
+        : m_cllm(cllm) {}
+
+    void onSplitIntoTwo(const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) override
+    { /* no-op */ }
+
+    void onSplitIntoThree(const QString &secondBuffer, const QString &thirdBuffer) override
+    { /* no-op */ }
+
+    void onOldResponseChunk(const QByteArray &chunk) override
+    { m_response.append(chunk); }
+
+    bool onBufferResponse(const QString &response, int bufferIdx) override
+    {
+        if (bufferIdx == 1)
+            return true; // ignore "think" content
+        return onSimpleResponse(response);
+    }
+
+    bool onRegularResponse() override
+    { return onBufferResponse(QString::fromUtf8(m_response), 0); }
+
+    bool getStopGenerating() const override
+    { return m_cllm->m_stopGenerating; }
+
+protected:
+    virtual bool onSimpleResponse(const QString &response) = 0;
+
+protected:
+    ChatLLM    *m_cllm;
+    QByteArray  m_response;
+};
+
+class NameResponseHandler : public SimpleResponseHandler {
+private:
+    // max length of chat names, in words
+    static constexpr qsizetype MAX_WORDS = 3;
+
+public:
+    using SimpleResponseHandler::SimpleResponseHandler;
+
+protected:
+    bool onSimpleResponse(const QString &response) override
+    {
+        QTextStream stream(const_cast<QString *>(&response), QIODeviceBase::ReadOnly);
+        QStringList words;
+        while (!stream.atEnd() && words.size() < MAX_WORDS) {
+            QString word;
+            stream >> word;
+            words << word;
+        }
+
+        emit m_cllm->generatedNameChanged(words.join(u' '));
+        return words.size() < MAX_WORDS || stream.atEnd();
+    }
+};
+
 void ChatLLM::generateName()
 {
     Q_ASSERT(isModelLoaded());
@@ -1106,23 +1239,15 @@ void ChatLLM::generateName()
         return;
     }
 
-    QByteArray response; // raw UTF-8
-
-    auto handleResponse = [this, &response](LLModel::Token token, std::string_view piece) -> bool {
-        Q_UNUSED(token)
-
-        response.append(piece.data(), piece.size());
-        QStringList words = QString::fromUtf8(response).simplified().split(u' ', Qt::SkipEmptyParts);
-        emit generatedNameChanged(words.join(u' '));
-        return words.size() <= 3;
-    };
+    NameResponseHandler respHandler(this);
 
     try {
-        m_llModelInfo.model->prompt(
-            applyJinjaTemplate(forkConversation(chatNamePrompt)),
-            [this](auto &&...) { return !m_stopGenerating; },
-            handleResponse,
-            promptContextFromSettings(m_modelInfo)
+        promptModelWithTools(
+            m_llModelInfo.model.get(),
+            /*promptCallback*/ [this](auto &&...) { return !m_stopGenerating; },
+            respHandler, promptContextFromSettings(m_modelInfo),
+            applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(),
+            { ToolCallConstants::ThinkTagName }
         );
     } catch (const std::exception &e) {
         qWarning() << "ChatLLM failed to generate name:" << e.what();
@@ -1134,13 +1259,43 @@ void ChatLLM::handleChatIdChanged(const QString &id)
     m_llmThread.setObjectName(id);
 }
 
-void ChatLLM::generateQuestions(qint64 elapsed)
-{
+class QuestionResponseHandler : public SimpleResponseHandler {
+public:
+    using SimpleResponseHandler::SimpleResponseHandler;
+
+protected:
+    bool onSimpleResponse(const QString &response) override
+    {
+        auto responseUtf8Bytes = response.toUtf8().slice(m_offset);
+        auto responseUtf8 = std::string(responseUtf8Bytes.begin(), responseUtf8Bytes.end());
+        // extract all questions from response
+        ptrdiff_t lastMatchEnd = -1;
+        auto it = std::sregex_iterator(responseUtf8.begin(), responseUtf8.end(), s_reQuestion);
+        auto end = std::sregex_iterator();
+        for (; it != end; ++it) {
+            auto pos = it->position();
+            auto len = it->length();
+            lastMatchEnd = pos + len;
+            emit m_cllm->generatedQuestionFinished(QString::fromUtf8(&responseUtf8[pos], len));
+        }
+
+        // remove processed input from buffer
+        if (lastMatchEnd != -1)
+            m_offset += lastMatchEnd;
+        return true;
+    }
+
+private:
     // FIXME: This only works with response by the model in english which is not ideal for a multi-language
     // model.
     // match whole question sentences
-    static const std::regex reQuestion(R"(\b(?:What|Where|How|Why|When|Who|Which|Whose|Whom)\b[^?]*\?)");
+    static inline const std::regex s_reQuestion { R"(\b(?:What|Where|How|Why|When|Who|Which|Whose|Whom)\b[^?]*\?)" };
 
+    qsizetype m_offset = 0;
+};
+
+void ChatLLM::generateQuestions(qint64 elapsed)
+{
     Q_ASSERT(isModelLoaded());
     if (!isModelLoaded()) {
         emit responseStopped(elapsed);
@@ -1158,39 +1313,17 @@ void ChatLLM::generateQuestions(qint64 elapsed)
 
     emit generatingQuestions();
 
-    std::string response; // raw UTF-8
-
-    auto handleResponse = [this, &response](LLModel::Token token, std::string_view piece) -> bool {
-        Q_UNUSED(token)
-
-        // add token to buffer
-        response.append(piece);
-
-        // extract all questions from response
-        ptrdiff_t lastMatchEnd = -1;
-        auto it = std::sregex_iterator(response.begin(), response.end(), reQuestion);
-        auto end = std::sregex_iterator();
-        for (; it != end; ++it) {
-            auto pos = it->position();
-            auto len = it->length();
-            lastMatchEnd = pos + len;
-            emit generatedQuestionFinished(QString::fromUtf8(&response[pos], len));
-        }
-
-        // remove processed input from buffer
-        if (lastMatchEnd != -1)
-            response.erase(0, lastMatchEnd);
-        return true;
-    };
+    QuestionResponseHandler respHandler(this);
 
     QElapsedTimer totalTime;
     totalTime.start();
     try {
-        m_llModelInfo.model->prompt(
-            applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)),
-            [this](auto &&...) { return !m_stopGenerating; },
-            handleResponse,
-            promptContextFromSettings(m_modelInfo)
+        promptModelWithTools(
+            m_llModelInfo.model.get(),
+            /*promptCallback*/ [this](auto &&...) { return !m_stopGenerating; },
+            respHandler, promptContextFromSettings(m_modelInfo),
+            applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(),
+            { ToolCallConstants::ThinkTagName }
         );
     } catch (const std::exception &e) {
         qWarning() << "ChatLLM failed to generate follow-up questions:" << e.what();
diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h
index e34d3899..f071c0ac 100644
--- a/gpt4all-chat/src/chatllm.h
+++ b/gpt4all-chat/src/chatllm.h
@@ -30,6 +30,7 @@
 
 using namespace Qt::Literals::StringLiterals;
 
+class ChatViewResponseHandler;
 class QDataStream;
 
 // NOTE: values serialized to disk, do not change or reuse
@@ -285,6 +286,8 @@ private:
     bool m_isServer;
     bool m_forceMetal;
     bool m_reloadingToChangeVariant;
+    friend class ChatViewResponseHandler;
+    friend class SimpleResponseHandler;
 };
 
 #endif // CHATLLM_H
diff --git a/gpt4all-chat/src/modellist.h b/gpt4all-chat/src/modellist.h
index 0bcc97b4..f9b1713b 100644
--- a/gpt4all-chat/src/modellist.h
+++ b/gpt4all-chat/src/modellist.h
@@ -269,7 +269,7 @@ private:
             std::optional<QString> m_chatTemplate;
     mutable std::optional<QString> m_modelChatTemplate;
     QString m_systemMessage;
-    QString m_chatNamePrompt          = "Describe the above conversation in seven words or less.";
+    QString m_chatNamePrompt          = "Describe the above conversation. Your entire response must be three words or less.";
     QString m_suggestedFollowUpPrompt = "Suggest three very short factual follow-up questions that have not been answered yet or cannot be found inspired by the previous conversation and excerpts.";
     friend class MySettings;
     friend class ModelList;
diff --git a/gpt4all-chat/src/toolcallparser.cpp b/gpt4all-chat/src/toolcallparser.cpp
index bd456687..2d53401a 100644
--- a/gpt4all-chat/src/toolcallparser.cpp
+++ b/gpt4all-chat/src/toolcallparser.cpp
@@ -1,17 +1,29 @@
 #include "toolcallparser.h"
 
-#include <QDebug>
-#include <QtGlobal>
-#include <QtLogging>
+#include "tool.h"
+
+#include <QSet>
+#include <QtGlobal>
+
+#include <stdexcept>
 
-#include <cstddef>
 
 ToolCallParser::ToolCallParser()
+    : ToolCallParser(ToolCallConstants::AllTagNames)
+    {}
+
+ToolCallParser::ToolCallParser(const QStringList &tagNames)
 {
-    m_possibleStartTags << ToolCallConstants::CodeInterpreterTag.toUtf8()
-                        << ToolCallConstants::ThinkTag.toUtf8();
-    m_possibleEndTags   << ToolCallConstants::CodeInterpreterEndTag.toUtf8()
-                        << ToolCallConstants::ThinkEndTag.toUtf8();
+    QSet<QChar> firstChars;
+    for (auto &name : tagNames) {
+        if (name.isEmpty())
+            throw std::invalid_argument("ToolCallParser(): tag names must not be empty");
+        if (firstChars.contains(name.at(0)))
+            throw std::invalid_argument("ToolCallParser(): tag names must not share any prefix");
+        firstChars << name.at(0);
+        m_possibleStartTags << makeStartTag(name).toUtf8();
+        m_possibleEndTags   << makeEndTag  (name).toUtf8();
+    }
     reset();
 }
 
@@ -80,7 +92,7 @@ void ToolCallParser::update(const QByteArray &update)
 {
     currentBuffer().append(update);
 
-    for (size_t i = currentBuffer().size() - update.size(); i < currentBuffer().size(); ++i) {
+    for (qsizetype i = currentBuffer().size() - update.size(); i < currentBuffer().size(); ++i) {
         const char c = currentBuffer()[i];
         const bool foundMatch = isExpected(c);
         if (!foundMatch) {
diff --git a/gpt4all-chat/src/toolcallparser.h b/gpt4all-chat/src/toolcallparser.h
index fb5140cc..4676fa4a 100644
--- a/gpt4all-chat/src/toolcallparser.h
+++ b/gpt4all-chat/src/toolcallparser.h
@@ -1,30 +1,22 @@
 #ifndef TOOLCALLPARSER_H
 #define TOOLCALLPARSER_H
 
-#include "tool.h"
-
 #include <QByteArray>
 #include <QList>
 #include <QString>
 #include <QStringList>
 
-namespace ToolCallConstants
-{
-    const QString CodeInterpreterFunction = R"(javascript_interpret)";
-    const QString CodeInterpreterTag = R"(<)" + CodeInterpreterFunction + R"(>)";
-    const QString CodeInterpreterEndTag = R"(</)" + CodeInterpreterFunction + R"(>)";
-    const QString CodeInterpreterPrefix = CodeInterpreterTag + "\n```javascript\n";
-    const QString CodeInterpreterSuffix = "```\n" + CodeInterpreterEndTag;
+namespace ToolEnums { enum class ParseState; }
+
+using namespace Qt::Literals::StringLiterals;
 
-    // NB: the parsing code assumes the first char of the various tags differ
-    const QString ThinkTag = QStringLiteral("<think>");
-    const QString ThinkEndTag = QStringLiteral("</think>");
-}
 
 class ToolCallParser
 {
 public:
     ToolCallParser();
+    ToolCallParser(const QStringList &tagNames);
+
     void reset();
     void update(const QByteArray &update);
     QString toolCall() const { return QString::fromUtf8(m_toolCall); }
@@ -37,6 +29,9 @@ public:
     QStringList buffers() const;
     int numberOfBuffers() const { return m_buffers.size(); }
 
+    static QString makeStartTag(const QString &name) { return u"<%1>"_s .arg(name); }
+    static QString makeEndTag  (const QString &name) { return u"</%1>"_s.arg(name); }
+
 private:
     QByteArray &currentBuffer();
     void resetSearchState();
@@ -58,4 +53,21 @@ private:
     int m_endIndex;
 };
 
+namespace ToolCallConstants
+{
+    // NB: the parsing code assumes the first char of the various tags differ
+
+    inline const QString CodeInterpreterFunction = u"javascript_interpret"_s;
+    inline const QString CodeInterpreterStartTag = ToolCallParser::makeStartTag(CodeInterpreterFunction);
+    inline const QString CodeInterpreterEndTag   = ToolCallParser::makeEndTag  (CodeInterpreterFunction);
+    inline const QString CodeInterpreterPrefix   = u"%1\n```javascript\n"_s.arg(CodeInterpreterStartTag);
+    inline const QString CodeInterpreterSuffix   = u"```\n%1"_s            .arg(CodeInterpreterEndTag  );
+
+    inline const QString ThinkTagName  = u"think"_s;
+    inline const QString ThinkStartTag = ToolCallParser::makeStartTag(ThinkTagName);
+    inline const QString ThinkEndTag   = ToolCallParser::makeEndTag  (ThinkTagName);
+
+    inline const QStringList AllTagNames { CodeInterpreterFunction, ThinkTagName };
+}
+
 #endif // TOOLCALLPARSER_H