mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-05-05 15:07:18 +00:00
chat: fix emoji corruption (#3443)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
f8f78c6677
commit
1f2712d57c
@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
- Code Interpreter: Fix console.log not accepting a single string after v3.7.0 ([#3426](https://github.com/nomic-ai/gpt4all/pull/3426))
|
||||
- Fix Phi 3.1 Mini 128K Instruct template (by [@ThiloteE](https://github.com/ThiloteE) in [#3412](https://github.com/nomic-ai/gpt4all/pull/3412))
|
||||
- Don't block the gui thread for reasoning ([#3435](https://github.com/nomic-ai/gpt4all/pull/3435))
|
||||
- Fix corruption of unicode in output of reasoning models ([#3443](https://github.com/nomic-ai/gpt4all/pull/3443))
|
||||
|
||||
## [3.7.0] - 2025-01-21
|
||||
|
||||
|
@ -254,7 +254,7 @@ void Chat::responseStopped(qint64 promptResponseMs)
|
||||
});
|
||||
|
||||
ToolCallParser parser;
|
||||
parser.update(possibleToolcall);
|
||||
parser.update(possibleToolcall.toUtf8());
|
||||
if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkTag)
|
||||
processToolCall(parser.toolCall());
|
||||
else
|
||||
|
@ -938,13 +938,11 @@ auto ChatLLM::promptInternal(
|
||||
result.responseTokens++;
|
||||
m_timer->inc();
|
||||
|
||||
// FIXME: This is *not* necessarily fully formed utf data because it can be partial at this point
|
||||
// handle this like below where we have a QByteArray
|
||||
toolCallParser.update(QString::fromStdString(piece.data()));
|
||||
toolCallParser.update(piece.data());
|
||||
|
||||
// Split the response into two if needed and create chat items
|
||||
if (toolCallParser.numberOfBuffers() < 2 && toolCallParser.splitIfPossible()) {
|
||||
const QVector<QString> &parseBuffers = toolCallParser.buffers();
|
||||
const auto parseBuffers = toolCallParser.buffers();
|
||||
Q_ASSERT(parseBuffers.size() == 2);
|
||||
if (toolCallParser.startTag() == ToolCallConstants::ThinkTag)
|
||||
m_chatModel->splitThinking({parseBuffers.at(0), parseBuffers.at(1)});
|
||||
@ -955,7 +953,7 @@ auto ChatLLM::promptInternal(
|
||||
// Split the response into three if needed and create chat items
|
||||
if (toolCallParser.numberOfBuffers() < 3 && toolCallParser.startTag() == ToolCallConstants::ThinkTag
|
||||
&& toolCallParser.splitIfPossible()) {
|
||||
const QVector<QString> &parseBuffers = toolCallParser.buffers();
|
||||
const auto parseBuffers = toolCallParser.buffers();
|
||||
Q_ASSERT(parseBuffers.size() == 3);
|
||||
m_chatModel->endThinking({parseBuffers.at(1), parseBuffers.at(2)}, totalTime.elapsed());
|
||||
}
|
||||
@ -964,7 +962,7 @@ auto ChatLLM::promptInternal(
|
||||
auto respStr = QString::fromUtf8(result.response);
|
||||
|
||||
try {
|
||||
const QVector<QString> &parseBuffers = toolCallParser.buffers();
|
||||
const auto parseBuffers = toolCallParser.buffers();
|
||||
if (parseBuffers.size() > 1)
|
||||
m_chatModel->setResponseValue(parseBuffers.last());
|
||||
else
|
||||
@ -998,7 +996,7 @@ auto ChatLLM::promptInternal(
|
||||
m_timer->stop();
|
||||
qint64 elapsed = totalTime.elapsed();
|
||||
|
||||
const QVector<QString> &parseBuffers = toolCallParser.buffers();
|
||||
const auto parseBuffers = toolCallParser.buffers();
|
||||
const bool shouldExecuteToolCall = toolCallParser.state() == ToolEnums::ParseState::Complete
|
||||
&& toolCallParser.startTag() != ToolCallConstants::ThinkTag;
|
||||
|
||||
|
@ -265,7 +265,7 @@ public:
|
||||
if (type() == Type::Response) {
|
||||
// We parse if this contains any part of a partial toolcall
|
||||
ToolCallParser parser;
|
||||
parser.update(value);
|
||||
parser.update(value.toUtf8());
|
||||
|
||||
// If no tool call is detected, return the original value
|
||||
if (parser.startIndex() < 0)
|
||||
@ -292,7 +292,7 @@ public:
|
||||
QString thinkContent(const QString &value) const
|
||||
{
|
||||
ToolCallParser parser;
|
||||
parser.update(value);
|
||||
parser.update(value.toUtf8());
|
||||
|
||||
// Extract the content
|
||||
QString content = parser.toolCall();
|
||||
@ -303,7 +303,7 @@ public:
|
||||
QString toolCallContent(const QString &value) const
|
||||
{
|
||||
ToolCallParser parser;
|
||||
parser.update(value);
|
||||
parser.update(value.toUtf8());
|
||||
|
||||
// Extract the code
|
||||
QString code = parser.toolCall();
|
||||
|
@ -8,10 +8,10 @@
|
||||
|
||||
ToolCallParser::ToolCallParser()
|
||||
{
|
||||
m_possibleStartTags << ToolCallConstants::CodeInterpreterTag
|
||||
<< ToolCallConstants::ThinkTag;
|
||||
m_possibleEndTags << ToolCallConstants::CodeInterpreterEndTag
|
||||
<< ToolCallConstants::ThinkEndTag;
|
||||
m_possibleStartTags << ToolCallConstants::CodeInterpreterTag.toUtf8()
|
||||
<< ToolCallConstants::ThinkTag.toUtf8();
|
||||
m_possibleEndTags << ToolCallConstants::CodeInterpreterEndTag.toUtf8()
|
||||
<< ToolCallConstants::ThinkEndTag.toUtf8();
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ void ToolCallParser::reset()
|
||||
|
||||
// These are global states maintained between update calls
|
||||
m_buffers.clear();
|
||||
m_buffers.append(QString());
|
||||
m_buffers << QByteArray();
|
||||
}
|
||||
|
||||
void ToolCallParser::resetSearchState()
|
||||
@ -40,35 +40,35 @@ void ToolCallParser::resetSearchState()
|
||||
m_endIndex = -1;
|
||||
}
|
||||
|
||||
bool ToolCallParser::isExpected(QChar c) const
|
||||
bool ToolCallParser::isExpected(char c) const
|
||||
{
|
||||
return m_expected.isEmpty() || m_expected.contains(c);
|
||||
}
|
||||
|
||||
void ToolCallParser::setExpected(const QStringList &tags)
|
||||
void ToolCallParser::setExpected(const QList<QByteArray> &tags)
|
||||
{
|
||||
m_expected.clear();
|
||||
for (const QString &tag : tags) {
|
||||
for (const auto &tag : tags) {
|
||||
Q_ASSERT(tag.size() > m_expectedIndex);
|
||||
m_expected << tag.at(m_expectedIndex);
|
||||
}
|
||||
}
|
||||
|
||||
QString ToolCallParser::startTag() const
|
||||
QByteArray ToolCallParser::startTag() const
|
||||
{
|
||||
if (m_currentTagIndex < 0)
|
||||
return QString();
|
||||
return {};
|
||||
return m_possibleStartTags.at(m_currentTagIndex);
|
||||
}
|
||||
|
||||
QString ToolCallParser::endTag() const
|
||||
QByteArray ToolCallParser::endTag() const
|
||||
{
|
||||
if (m_currentTagIndex < 0)
|
||||
return QString();
|
||||
return {};
|
||||
return m_possibleEndTags.at(m_currentTagIndex);
|
||||
}
|
||||
|
||||
QString &ToolCallParser::currentBuffer()
|
||||
QByteArray &ToolCallParser::currentBuffer()
|
||||
{
|
||||
return m_buffers.last();
|
||||
}
|
||||
@ -76,12 +76,12 @@ QString &ToolCallParser::currentBuffer()
|
||||
// This method is called with an arbitrary string and a current state. This method should take the
|
||||
// current state into account and then parse through the update character by character to arrive at
|
||||
// the new state.
|
||||
void ToolCallParser::update(const QString &update)
|
||||
void ToolCallParser::update(const QByteArray &update)
|
||||
{
|
||||
currentBuffer().append(update);
|
||||
|
||||
for (size_t i = currentBuffer().size() - update.size(); i < currentBuffer().size(); ++i) {
|
||||
const QChar c = currentBuffer()[i];
|
||||
const char c = currentBuffer()[i];
|
||||
const bool foundMatch = isExpected(c);
|
||||
if (!foundMatch) {
|
||||
resetSearchState();
|
||||
@ -100,7 +100,7 @@ void ToolCallParser::update(const QString &update)
|
||||
case ToolEnums::ParseState::InTagChoice:
|
||||
{
|
||||
for (int i = 0; i < m_possibleStartTags.size(); ++i) {
|
||||
const QString tag = m_possibleStartTags.at(i);
|
||||
const auto &tag = m_possibleStartTags.at(i);
|
||||
if (c == tag.at(1)) m_currentTagIndex = i;
|
||||
}
|
||||
if (m_currentTagIndex >= 0) {
|
||||
@ -115,7 +115,7 @@ void ToolCallParser::update(const QString &update)
|
||||
{
|
||||
m_startTagBuffer.append(c);
|
||||
|
||||
const QString startTag = this->startTag();
|
||||
const auto startTag = this->startTag();
|
||||
Q_ASSERT(!startTag.isEmpty());
|
||||
if (m_expectedIndex == startTag.size() - 1) {
|
||||
m_expectedIndex = 0;
|
||||
@ -131,7 +131,7 @@ void ToolCallParser::update(const QString &update)
|
||||
case ToolEnums::ParseState::Partial:
|
||||
{
|
||||
Q_ASSERT(m_currentTagIndex >= 0);
|
||||
const QString endTag = this->endTag();
|
||||
const auto endTag = this->endTag();
|
||||
Q_ASSERT(!endTag.isEmpty());
|
||||
m_toolCall.append(c);
|
||||
m_endTagBuffer.append(c);
|
||||
@ -159,8 +159,8 @@ bool ToolCallParser::splitIfPossible()
|
||||
// The first split happens when we're in a partial state
|
||||
if (m_buffers.size() < 2 && m_state == ToolEnums::ParseState::Partial) {
|
||||
Q_ASSERT(m_startIndex >= 0);
|
||||
const QString beforeToolCall = currentBuffer().left(m_startIndex);
|
||||
const QString toolCall = currentBuffer().mid(m_startIndex);
|
||||
const auto beforeToolCall = currentBuffer().left(m_startIndex);
|
||||
const auto toolCall = currentBuffer().mid (m_startIndex);
|
||||
m_buffers = { beforeToolCall, toolCall };
|
||||
return true;
|
||||
}
|
||||
@ -168,9 +168,9 @@ bool ToolCallParser::splitIfPossible()
|
||||
// The second split happens when we're in the complete state
|
||||
if (m_buffers.size() < 3 && m_state == ToolEnums::ParseState::Complete) {
|
||||
Q_ASSERT(m_endIndex >= 0);
|
||||
const QString beforeToolCall = m_buffers.first();
|
||||
const QString toolCall = currentBuffer().left(m_endIndex);
|
||||
const QString afterToolCall = currentBuffer().mid(m_endIndex);
|
||||
const auto &beforeToolCall = m_buffers.first();
|
||||
const auto toolCall = currentBuffer().left(m_endIndex);
|
||||
const auto afterToolCall = currentBuffer().mid (m_endIndex);
|
||||
m_buffers = { beforeToolCall, toolCall, afterToolCall };
|
||||
return true;
|
||||
}
|
||||
@ -178,7 +178,11 @@ bool ToolCallParser::splitIfPossible()
|
||||
return false;
|
||||
}
|
||||
|
||||
const QVector<QString> &ToolCallParser::buffers() const
|
||||
QStringList ToolCallParser::buffers() const
|
||||
{
|
||||
return m_buffers;
|
||||
QStringList result;
|
||||
result.reserve(m_buffers.size());
|
||||
for (const auto &buffer : m_buffers)
|
||||
result << QString::fromUtf8(buffer);
|
||||
return result;
|
||||
}
|
||||
|
@ -3,9 +3,10 @@
|
||||
|
||||
#include "tool.h"
|
||||
|
||||
#include <QChar>
|
||||
#include <QByteArray>
|
||||
#include <QList>
|
||||
#include <QString>
|
||||
#include <QPair>
|
||||
#include <QStringList>
|
||||
|
||||
namespace ToolCallConstants
|
||||
{
|
||||
@ -25,34 +26,34 @@ class ToolCallParser
|
||||
public:
|
||||
ToolCallParser();
|
||||
void reset();
|
||||
void update(const QString &update);
|
||||
QString toolCall() const { return m_toolCall; }
|
||||
void update(const QByteArray &update);
|
||||
QString toolCall() const { return QString::fromUtf8(m_toolCall); }
|
||||
int startIndex() const { return m_startIndex; }
|
||||
ToolEnums::ParseState state() const { return m_state; }
|
||||
QString startTag() const;
|
||||
QString endTag() const;
|
||||
QByteArray startTag() const;
|
||||
QByteArray endTag() const;
|
||||
|
||||
bool splitIfPossible();
|
||||
const QVector<QString> &buffers() const;
|
||||
QStringList buffers() const;
|
||||
int numberOfBuffers() const { return m_buffers.size(); }
|
||||
|
||||
private:
|
||||
QString ¤tBuffer();
|
||||
QByteArray ¤tBuffer();
|
||||
void resetSearchState();
|
||||
bool isExpected(QChar c) const;
|
||||
void setExpected(const QStringList &tags);
|
||||
bool isExpected(char c) const;
|
||||
void setExpected(const QList<QByteArray> &tags);
|
||||
|
||||
QStringList m_possibleStartTags;
|
||||
QStringList m_possibleEndTags;
|
||||
QString m_startTagBuffer;
|
||||
QString m_endTagBuffer;
|
||||
QList<QByteArray> m_possibleStartTags;
|
||||
QList<QByteArray> m_possibleEndTags;
|
||||
QByteArray m_startTagBuffer;
|
||||
QByteArray m_endTagBuffer;
|
||||
int m_currentTagIndex;
|
||||
|
||||
QVector<QChar> m_expected;
|
||||
QList<char> m_expected;
|
||||
int m_expectedIndex;
|
||||
ToolEnums::ParseState m_state;
|
||||
QVector<QString> m_buffers;
|
||||
QString m_toolCall;
|
||||
QList<QByteArray> m_buffers;
|
||||
QByteArray m_toolCall;
|
||||
int m_startIndex;
|
||||
int m_endIndex;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user