From 8c9f26e2494d5e5bbe25d49732b8d703fddb5b8a Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 4 Feb 2025 12:08:17 -0500 Subject: [PATCH] Ignore DeepSeek-R1 "think" content in name/follow-up responses (#3458) Signed-off-by: Jared Van Bortel --- gpt4all-chat/CHANGELOG.md | 1 + gpt4all-chat/src/chat.cpp | 9 +- gpt4all-chat/src/chatllm.cpp | 349 +++++++++++++++++++--------- gpt4all-chat/src/chatllm.h | 3 + gpt4all-chat/src/modellist.h | 2 +- gpt4all-chat/src/toolcallparser.cpp | 30 ++- gpt4all-chat/src/toolcallparser.h | 38 +-- 7 files changed, 295 insertions(+), 137 deletions(-) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 579f73a3..4eec33be 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Fixed - Fix "index N is not a prompt" when using LocalDocs with reasoning ([#3451](https://github.com/nomic-ai/gpt4all/pull/3451) - Work around rendering artifacts on Snapdragon SoCs with Windows ([#3450](https://github.com/nomic-ai/gpt4all/pull/3450)) +- Prevent DeepSeek-R1 reasoning from appearing in chat names and follow-up questions ([#3458](https://github.com/nomic-ai/gpt4all/pull/3458)) ## [3.8.0] - 2025-01-30 diff --git a/gpt4all-chat/src/chat.cpp b/gpt4all-chat/src/chat.cpp index 725d2374..ec0ecf99 100644 --- a/gpt4all-chat/src/chat.cpp +++ b/gpt4all-chat/src/chat.cpp @@ -255,7 +255,7 @@ void Chat::responseStopped(qint64 promptResponseMs) ToolCallParser parser; parser.update(possibleToolcall.toUtf8()); - if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkTag) + if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkStartTag) processToolCall(parser.toolCall()); else responseComplete(); @@ -381,11 +381,8 @@ void Chat::trySwitchContextOfLoadedModel() void Chat::generatedNameChanged(const QString &name) { - // Only use the first three words maximum and remove newlines and extra spaces - m_generatedName = name.simplified(); - QStringList words = m_generatedName.split(' ', Qt::SkipEmptyParts); - int wordCount = qMin(7, words.size()); - m_name = words.mid(0, wordCount).join(' '); + m_generatedName = name; + m_name = name; emit nameChanged(); m_needsSave = true; } diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp index f8ac85a4..8bca1143 100644 --- a/gpt4all-chat/src/chatllm.cpp +++ b/gpt4all-chat/src/chatllm.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,70 @@ static const std::shared_ptr &jinjaEnv() return environment; } +class BaseResponseHandler { +public: + virtual void onSplitIntoTwo (const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) = 0; + virtual void onSplitIntoThree (const QString &secondBuffer, const QString &thirdBuffer) = 0; + // "old-style" responses, with all of the implementation details left in + virtual void onOldResponseChunk(const QByteArray &chunk) = 0; + // notify of a "new-style" response that has been cleaned of tool calling + virtual bool onBufferResponse (const QString &response, int bufferIdx) = 0; + // notify of a "new-style" response, no tool calling applicable + virtual bool onRegularResponse () = 0; + virtual bool getStopGenerating () const = 0; +}; + +static auto promptModelWithTools( + LLModel *model, const LLModel::PromptCallback &promptCallback, BaseResponseHandler &respHandler, + const LLModel::PromptContext &ctx, const QByteArray &prompt, const QStringList &toolNames +) -> std::pair +{ + ToolCallParser toolCallParser(toolNames); + auto handleResponse = [&toolCallParser, &respHandler](LLModel::Token token, std::string_view piece) -> bool { + Q_UNUSED(token) + + toolCallParser.update(piece.data()); + + // Split the response into two if needed + if (toolCallParser.numberOfBuffers() < 2 && toolCallParser.splitIfPossible()) { + const auto parseBuffers = toolCallParser.buffers(); + Q_ASSERT(parseBuffers.size() == 2); + respHandler.onSplitIntoTwo(toolCallParser.startTag(), parseBuffers.at(0), parseBuffers.at(1)); + } + + // Split the response into three if needed + if (toolCallParser.numberOfBuffers() < 3 && toolCallParser.startTag() == ToolCallConstants::ThinkStartTag + && toolCallParser.splitIfPossible()) { + const auto parseBuffers = toolCallParser.buffers(); + Q_ASSERT(parseBuffers.size() == 3); + respHandler.onSplitIntoThree(parseBuffers.at(1), parseBuffers.at(2)); + } + + respHandler.onOldResponseChunk(QByteArray::fromRawData(piece.data(), piece.size())); + + bool ok; + const auto parseBuffers = toolCallParser.buffers(); + if (parseBuffers.size() > 1) { + ok = respHandler.onBufferResponse(parseBuffers.last(), parseBuffers.size() - 1); + } else { + ok = respHandler.onRegularResponse(); + } + if (!ok) + return false; + + const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete + && toolCallParser.startTag() != ToolCallConstants::ThinkStartTag; + + return !shouldExecuteToolCall && !respHandler.getStopGenerating(); + }; + model->prompt(std::string_view(prompt), promptCallback, handleResponse, ctx); + + const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete + && toolCallParser.startTag() != ToolCallConstants::ThinkStartTag; + + return { toolCallParser.buffers(), shouldExecuteToolCall }; +} + class LLModelStore { public: static LLModelStore *globalInstance(); @@ -882,6 +947,62 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL }; } +class ChatViewResponseHandler : public BaseResponseHandler { +public: + ChatViewResponseHandler(ChatLLM *cllm, QElapsedTimer *totalTime, ChatLLM::PromptResult *result) + : m_cllm(cllm), m_totalTime(totalTime), m_result(result) {} + + void onSplitIntoTwo(const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) override + { + if (startTag == ToolCallConstants::ThinkStartTag) + m_cllm->m_chatModel->splitThinking({ firstBuffer, secondBuffer }); + else + m_cllm->m_chatModel->splitToolCall({ firstBuffer, secondBuffer }); + } + + void onSplitIntoThree(const QString &secondBuffer, const QString &thirdBuffer) override + { + m_cllm->m_chatModel->endThinking({ secondBuffer, thirdBuffer }, m_totalTime->elapsed()); + } + + void onOldResponseChunk(const QByteArray &chunk) override + { + m_result->responseTokens++; + m_cllm->m_timer->inc(); + m_result->response.append(chunk); + } + + bool onBufferResponse(const QString &response, int bufferIdx) override + { + Q_UNUSED(bufferIdx) + try { + m_cllm->m_chatModel->setResponseValue(response); + } catch (const std::exception &e) { + // We have a try/catch here because the main thread might have removed the response from + // the chatmodel by erasing the conversation during the response... the main thread sets + // m_stopGenerating before doing so, but it doesn't wait after that to reset the chatmodel + Q_ASSERT(m_cllm->m_stopGenerating); + return false; + } + emit m_cllm->responseChanged(); + return true; + } + + bool onRegularResponse() override + { + auto respStr = QString::fromUtf8(m_result->response); + return onBufferResponse(removeLeadingWhitespace(respStr), 0); + } + + bool getStopGenerating() const override + { return m_cllm->m_stopGenerating; } + +private: + ChatLLM *m_cllm; + QElapsedTimer *m_totalTime; + ChatLLM::PromptResult *m_result; +}; + auto ChatLLM::promptInternal( const std::variant, std::string_view> &prompt, const LLModel::PromptContext &ctx, @@ -931,64 +1052,20 @@ auto ChatLLM::promptInternal( QElapsedTimer totalTime; totalTime.start(); + ChatViewResponseHandler respHandler(this, &totalTime, &result); + m_timer->start(); - - ToolCallParser toolCallParser; - auto handleResponse = [this, &result, &toolCallParser, &totalTime](LLModel::Token token, std::string_view piece) -> bool { - Q_UNUSED(token) - result.responseTokens++; - m_timer->inc(); - - toolCallParser.update(piece.data()); - - // Split the response into two if needed and create chat items - if (toolCallParser.numberOfBuffers() < 2 && toolCallParser.splitIfPossible()) { - const auto parseBuffers = toolCallParser.buffers(); - Q_ASSERT(parseBuffers.size() == 2); - if (toolCallParser.startTag() == ToolCallConstants::ThinkTag) - m_chatModel->splitThinking({parseBuffers.at(0), parseBuffers.at(1)}); - else - m_chatModel->splitToolCall({parseBuffers.at(0), parseBuffers.at(1)}); - } - - // Split the response into three if needed and create chat items - if (toolCallParser.numberOfBuffers() < 3 && toolCallParser.startTag() == ToolCallConstants::ThinkTag - && toolCallParser.splitIfPossible()) { - const auto parseBuffers = toolCallParser.buffers(); - Q_ASSERT(parseBuffers.size() == 3); - m_chatModel->endThinking({parseBuffers.at(1), parseBuffers.at(2)}, totalTime.elapsed()); - } - - result.response.append(piece.data(), piece.size()); - auto respStr = QString::fromUtf8(result.response); - - try { - const auto parseBuffers = toolCallParser.buffers(); - if (parseBuffers.size() > 1) - m_chatModel->setResponseValue(parseBuffers.last()); - else - m_chatModel->setResponseValue(removeLeadingWhitespace(respStr)); - } catch (const std::exception &e) { - // We have a try/catch here because the main thread might have removed the response from - // the chatmodel by erasing the conversation during the response... the main thread sets - // m_stopGenerating before doing so, but it doesn't wait after that to reset the chatmodel - Q_ASSERT(m_stopGenerating); - return false; - } - - emit responseChanged(); - - const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete - && toolCallParser.startTag() != ToolCallConstants::ThinkTag; - - return !shouldExecuteToolCall && !m_stopGenerating; - }; - + QStringList finalBuffers; + bool shouldExecuteTool; try { emit promptProcessing(); m_llModelInfo.model->setThreadCount(mySettings->threadCount()); m_stopGenerating = false; - m_llModelInfo.model->prompt(conversation, handlePrompt, handleResponse, ctx); + std::tie(finalBuffers, shouldExecuteTool) = promptModelWithTools( + m_llModelInfo.model.get(), handlePrompt, respHandler, ctx, + QByteArray::fromRawData(conversation.data(), conversation.size()), + ToolCallConstants::AllTagNames + ); } catch (...) { m_timer->stop(); throw; @@ -997,22 +1074,18 @@ auto ChatLLM::promptInternal( m_timer->stop(); qint64 elapsed = totalTime.elapsed(); - const auto parseBuffers = toolCallParser.buffers(); - const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete - && toolCallParser.startTag() != ToolCallConstants::ThinkTag; - // trim trailing whitespace auto respStr = QString::fromUtf8(result.response); - if (!respStr.isEmpty() && (std::as_const(respStr).back().isSpace() || parseBuffers.size() > 1)) { - if (parseBuffers.size() > 1) - m_chatModel->setResponseValue(parseBuffers.last()); + if (!respStr.isEmpty() && (std::as_const(respStr).back().isSpace() || finalBuffers.size() > 1)) { + if (finalBuffers.size() > 1) + m_chatModel->setResponseValue(finalBuffers.last()); else m_chatModel->setResponseValue(respStr.trimmed()); emit responseChanged(); } bool doQuestions = false; - if (!m_isServer && messageItems && !shouldExecuteToolCall) { + if (!m_isServer && messageItems && !shouldExecuteTool) { switch (mySettings->suggestionMode()) { case SuggestionMode::On: doQuestions = true; break; case SuggestionMode::LocalDocsOnly: doQuestions = usedLocalDocs; break; @@ -1090,6 +1163,66 @@ void ChatLLM::reloadModel() loadModel(m); } +// This class throws discards the text within thinking tags, for use with chat names and follow-up questions. +class SimpleResponseHandler : public BaseResponseHandler { +public: + SimpleResponseHandler(ChatLLM *cllm) + : m_cllm(cllm) {} + + void onSplitIntoTwo(const QString &startTag, const QString &firstBuffer, const QString &secondBuffer) override + { /* no-op */ } + + void onSplitIntoThree(const QString &secondBuffer, const QString &thirdBuffer) override + { /* no-op */ } + + void onOldResponseChunk(const QByteArray &chunk) override + { m_response.append(chunk); } + + bool onBufferResponse(const QString &response, int bufferIdx) override + { + if (bufferIdx == 1) + return true; // ignore "think" content + return onSimpleResponse(response); + } + + bool onRegularResponse() override + { return onBufferResponse(QString::fromUtf8(m_response), 0); } + + bool getStopGenerating() const override + { return m_cllm->m_stopGenerating; } + +protected: + virtual bool onSimpleResponse(const QString &response) = 0; + +protected: + ChatLLM *m_cllm; + QByteArray m_response; +}; + +class NameResponseHandler : public SimpleResponseHandler { +private: + // max length of chat names, in words + static constexpr qsizetype MAX_WORDS = 3; + +public: + using SimpleResponseHandler::SimpleResponseHandler; + +protected: + bool onSimpleResponse(const QString &response) override + { + QTextStream stream(const_cast(&response), QIODeviceBase::ReadOnly); + QStringList words; + while (!stream.atEnd() && words.size() < MAX_WORDS) { + QString word; + stream >> word; + words << word; + } + + emit m_cllm->generatedNameChanged(words.join(u' ')); + return words.size() < MAX_WORDS || stream.atEnd(); + } +}; + void ChatLLM::generateName() { Q_ASSERT(isModelLoaded()); @@ -1106,23 +1239,15 @@ void ChatLLM::generateName() return; } - QByteArray response; // raw UTF-8 - - auto handleResponse = [this, &response](LLModel::Token token, std::string_view piece) -> bool { - Q_UNUSED(token) - - response.append(piece.data(), piece.size()); - QStringList words = QString::fromUtf8(response).simplified().split(u' ', Qt::SkipEmptyParts); - emit generatedNameChanged(words.join(u' ')); - return words.size() <= 3; - }; + NameResponseHandler respHandler(this); try { - m_llModelInfo.model->prompt( - applyJinjaTemplate(forkConversation(chatNamePrompt)), - [this](auto &&...) { return !m_stopGenerating; }, - handleResponse, - promptContextFromSettings(m_modelInfo) + promptModelWithTools( + m_llModelInfo.model.get(), + /*promptCallback*/ [this](auto &&...) { return !m_stopGenerating; }, + respHandler, promptContextFromSettings(m_modelInfo), + applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(), + { ToolCallConstants::ThinkTagName } ); } catch (const std::exception &e) { qWarning() << "ChatLLM failed to generate name:" << e.what(); @@ -1134,13 +1259,43 @@ void ChatLLM::handleChatIdChanged(const QString &id) m_llmThread.setObjectName(id); } -void ChatLLM::generateQuestions(qint64 elapsed) -{ +class QuestionResponseHandler : public SimpleResponseHandler { +public: + using SimpleResponseHandler::SimpleResponseHandler; + +protected: + bool onSimpleResponse(const QString &response) override + { + auto responseUtf8Bytes = response.toUtf8().slice(m_offset); + auto responseUtf8 = std::string(responseUtf8Bytes.begin(), responseUtf8Bytes.end()); + // extract all questions from response + ptrdiff_t lastMatchEnd = -1; + auto it = std::sregex_iterator(responseUtf8.begin(), responseUtf8.end(), s_reQuestion); + auto end = std::sregex_iterator(); + for (; it != end; ++it) { + auto pos = it->position(); + auto len = it->length(); + lastMatchEnd = pos + len; + emit m_cllm->generatedQuestionFinished(QString::fromUtf8(&responseUtf8[pos], len)); + } + + // remove processed input from buffer + if (lastMatchEnd != -1) + m_offset += lastMatchEnd; + return true; + } + +private: // FIXME: This only works with response by the model in english which is not ideal for a multi-language // model. // match whole question sentences - static const std::regex reQuestion(R"(\b(?:What|Where|How|Why|When|Who|Which|Whose|Whom)\b[^?]*\?)"); + static inline const std::regex s_reQuestion { R"(\b(?:What|Where|How|Why|When|Who|Which|Whose|Whom)\b[^?]*\?)" }; + qsizetype m_offset = 0; +}; + +void ChatLLM::generateQuestions(qint64 elapsed) +{ Q_ASSERT(isModelLoaded()); if (!isModelLoaded()) { emit responseStopped(elapsed); @@ -1158,39 +1313,17 @@ void ChatLLM::generateQuestions(qint64 elapsed) emit generatingQuestions(); - std::string response; // raw UTF-8 - - auto handleResponse = [this, &response](LLModel::Token token, std::string_view piece) -> bool { - Q_UNUSED(token) - - // add token to buffer - response.append(piece); - - // extract all questions from response - ptrdiff_t lastMatchEnd = -1; - auto it = std::sregex_iterator(response.begin(), response.end(), reQuestion); - auto end = std::sregex_iterator(); - for (; it != end; ++it) { - auto pos = it->position(); - auto len = it->length(); - lastMatchEnd = pos + len; - emit generatedQuestionFinished(QString::fromUtf8(&response[pos], len)); - } - - // remove processed input from buffer - if (lastMatchEnd != -1) - response.erase(0, lastMatchEnd); - return true; - }; + QuestionResponseHandler respHandler(this); QElapsedTimer totalTime; totalTime.start(); try { - m_llModelInfo.model->prompt( - applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)), - [this](auto &&...) { return !m_stopGenerating; }, - handleResponse, - promptContextFromSettings(m_modelInfo) + promptModelWithTools( + m_llModelInfo.model.get(), + /*promptCallback*/ [this](auto &&...) { return !m_stopGenerating; }, + respHandler, promptContextFromSettings(m_modelInfo), + applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(), + { ToolCallConstants::ThinkTagName } ); } catch (const std::exception &e) { qWarning() << "ChatLLM failed to generate follow-up questions:" << e.what(); diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h index e34d3899..f071c0ac 100644 --- a/gpt4all-chat/src/chatllm.h +++ b/gpt4all-chat/src/chatllm.h @@ -30,6 +30,7 @@ using namespace Qt::Literals::StringLiterals; +class ChatViewResponseHandler; class QDataStream; // NOTE: values serialized to disk, do not change or reuse @@ -285,6 +286,8 @@ private: bool m_isServer; bool m_forceMetal; bool m_reloadingToChangeVariant; + friend class ChatViewResponseHandler; + friend class SimpleResponseHandler; }; #endif // CHATLLM_H diff --git a/gpt4all-chat/src/modellist.h b/gpt4all-chat/src/modellist.h index 0bcc97b4..f9b1713b 100644 --- a/gpt4all-chat/src/modellist.h +++ b/gpt4all-chat/src/modellist.h @@ -269,7 +269,7 @@ private: std::optional m_chatTemplate; mutable std::optional m_modelChatTemplate; QString m_systemMessage; - QString m_chatNamePrompt = "Describe the above conversation in seven words or less."; + QString m_chatNamePrompt = "Describe the above conversation. Your entire response must be three words or less."; QString m_suggestedFollowUpPrompt = "Suggest three very short factual follow-up questions that have not been answered yet or cannot be found inspired by the previous conversation and excerpts."; friend class MySettings; friend class ModelList; diff --git a/gpt4all-chat/src/toolcallparser.cpp b/gpt4all-chat/src/toolcallparser.cpp index bd456687..2d53401a 100644 --- a/gpt4all-chat/src/toolcallparser.cpp +++ b/gpt4all-chat/src/toolcallparser.cpp @@ -1,17 +1,29 @@ #include "toolcallparser.h" -#include -#include -#include +#include "tool.h" + +#include +#include + +#include -#include ToolCallParser::ToolCallParser() + : ToolCallParser(ToolCallConstants::AllTagNames) + {} + +ToolCallParser::ToolCallParser(const QStringList &tagNames) { - m_possibleStartTags << ToolCallConstants::CodeInterpreterTag.toUtf8() - << ToolCallConstants::ThinkTag.toUtf8(); - m_possibleEndTags << ToolCallConstants::CodeInterpreterEndTag.toUtf8() - << ToolCallConstants::ThinkEndTag.toUtf8(); + QSet firstChars; + for (auto &name : tagNames) { + if (name.isEmpty()) + throw std::invalid_argument("ToolCallParser(): tag names must not be empty"); + if (firstChars.contains(name.at(0))) + throw std::invalid_argument("ToolCallParser(): tag names must not share any prefix"); + firstChars << name.at(0); + m_possibleStartTags << makeStartTag(name).toUtf8(); + m_possibleEndTags << makeEndTag (name).toUtf8(); + } reset(); } @@ -80,7 +92,7 @@ void ToolCallParser::update(const QByteArray &update) { currentBuffer().append(update); - for (size_t i = currentBuffer().size() - update.size(); i < currentBuffer().size(); ++i) { + for (qsizetype i = currentBuffer().size() - update.size(); i < currentBuffer().size(); ++i) { const char c = currentBuffer()[i]; const bool foundMatch = isExpected(c); if (!foundMatch) { diff --git a/gpt4all-chat/src/toolcallparser.h b/gpt4all-chat/src/toolcallparser.h index fb5140cc..4676fa4a 100644 --- a/gpt4all-chat/src/toolcallparser.h +++ b/gpt4all-chat/src/toolcallparser.h @@ -1,30 +1,22 @@ #ifndef TOOLCALLPARSER_H #define TOOLCALLPARSER_H -#include "tool.h" - #include #include #include #include -namespace ToolCallConstants -{ - const QString CodeInterpreterFunction = R"(javascript_interpret)"; - const QString CodeInterpreterTag = R"(<)" + CodeInterpreterFunction + R"(>)"; - const QString CodeInterpreterEndTag = R"()"; - const QString CodeInterpreterPrefix = CodeInterpreterTag + "\n```javascript\n"; - const QString CodeInterpreterSuffix = "```\n" + CodeInterpreterEndTag; +namespace ToolEnums { enum class ParseState; } + +using namespace Qt::Literals::StringLiterals; - // NB: the parsing code assumes the first char of the various tags differ - const QString ThinkTag = QStringLiteral(""); - const QString ThinkEndTag = QStringLiteral(""); -} class ToolCallParser { public: ToolCallParser(); + ToolCallParser(const QStringList &tagNames); + void reset(); void update(const QByteArray &update); QString toolCall() const { return QString::fromUtf8(m_toolCall); } @@ -37,6 +29,9 @@ public: QStringList buffers() const; int numberOfBuffers() const { return m_buffers.size(); } + static QString makeStartTag(const QString &name) { return u"<%1>"_s .arg(name); } + static QString makeEndTag (const QString &name) { return u""_s.arg(name); } + private: QByteArray ¤tBuffer(); void resetSearchState(); @@ -58,4 +53,21 @@ private: int m_endIndex; }; +namespace ToolCallConstants +{ + // NB: the parsing code assumes the first char of the various tags differ + + inline const QString CodeInterpreterFunction = u"javascript_interpret"_s; + inline const QString CodeInterpreterStartTag = ToolCallParser::makeStartTag(CodeInterpreterFunction); + inline const QString CodeInterpreterEndTag = ToolCallParser::makeEndTag (CodeInterpreterFunction); + inline const QString CodeInterpreterPrefix = u"%1\n```javascript\n"_s.arg(CodeInterpreterStartTag); + inline const QString CodeInterpreterSuffix = u"```\n%1"_s .arg(CodeInterpreterEndTag ); + + inline const QString ThinkTagName = u"think"_s; + inline const QString ThinkStartTag = ToolCallParser::makeStartTag(ThinkTagName); + inline const QString ThinkEndTag = ToolCallParser::makeEndTag (ThinkTagName); + + inline const QStringList AllTagNames { CodeInterpreterFunction, ThinkTagName }; +} + #endif // TOOLCALLPARSER_H