mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-29 08:47:39 +00:00
chat: fix localdocs breakage in v3.5.2 (#3302)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
38d92cbb28
commit
db5800356b
@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302))
|
||||||
|
|
||||||
## [3.5.2] - 2024-12-13
|
## [3.5.2] - 2024-12-13
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
@ -223,6 +228,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
||||||
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
|
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
|
||||||
|
|
||||||
|
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...HEAD
|
||||||
[3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
|
[3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
|
||||||
[3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
|
[3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
|
||||||
[3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
|
[3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
|
||||||
|
@ -4,9 +4,9 @@ include(../common/common.cmake)
|
|||||||
|
|
||||||
set(APP_VERSION_MAJOR 3)
|
set(APP_VERSION_MAJOR 3)
|
||||||
set(APP_VERSION_MINOR 5)
|
set(APP_VERSION_MINOR 5)
|
||||||
set(APP_VERSION_PATCH 2)
|
set(APP_VERSION_PATCH 3)
|
||||||
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||||
set(APP_VERSION "${APP_VERSION_BASE}")
|
set(APP_VERSION "${APP_VERSION_BASE}-dev0")
|
||||||
|
|
||||||
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
|
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
|
||||||
|
|
||||||
|
@ -852,32 +852,29 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const ChatItem> items) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
|
auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
|
||||||
std::optional<QList<ChatItem>> chat) -> ChatPromptResult
|
std::optional<std::pair<int, int>> subrange) -> ChatPromptResult
|
||||||
{
|
{
|
||||||
Q_ASSERT(isModelLoaded());
|
Q_ASSERT(isModelLoaded());
|
||||||
Q_ASSERT(m_chatModel);
|
Q_ASSERT(m_chatModel);
|
||||||
|
|
||||||
|
// Return a (ChatModelAccessor, std::span) pair where the span represents the relevant messages for this chat.
|
||||||
|
// "subrange" is used to select only local server messages from the current chat session.
|
||||||
|
auto getChat = [&]() {
|
||||||
|
auto items = m_chatModel->chatItems(); // holds lock
|
||||||
|
std::span view(items);
|
||||||
|
if (subrange)
|
||||||
|
view = view.subspan(subrange->first, subrange->second);
|
||||||
|
Q_ASSERT(view.size() >= 2);
|
||||||
|
return std::pair(std::move(items), view);
|
||||||
|
};
|
||||||
|
|
||||||
// copy messages for safety (since we can't hold the lock the whole time)
|
// copy messages for safety (since we can't hold the lock the whole time)
|
||||||
std::optional<std::pair<int, QString>> query;
|
std::optional<std::pair<int, QString>> query;
|
||||||
std::vector<ChatItem> chatItems;
|
|
||||||
{
|
{
|
||||||
std::optional<ChatModelAccessor> items;
|
|
||||||
std::span<const ChatItem> view;
|
|
||||||
if (chat) {
|
|
||||||
view = *chat;
|
|
||||||
} else {
|
|
||||||
items = m_chatModel->chatItems(); // holds lock
|
|
||||||
Q_ASSERT(!items->empty());
|
|
||||||
view = *items;
|
|
||||||
}
|
|
||||||
Q_ASSERT(view.size() >= 2); // should be prompt/response pairs
|
|
||||||
|
|
||||||
// Find the prompt that represents the query. Server chats are flexible and may not have one.
|
// Find the prompt that represents the query. Server chats are flexible and may not have one.
|
||||||
auto response = view.end() - 1;
|
auto [_, view] = getChat(); // holds lock
|
||||||
if (auto peer = m_chatModel->getPeer(view, response))
|
if (auto peer = m_chatModel->getPeer(view, view.end() - 1)) // peer of response
|
||||||
query = { *peer - view.begin(), (*peer)->value };
|
query = { *peer - view.begin(), (*peer)->value };
|
||||||
|
|
||||||
chatItems.assign(view.begin(), view.end() - 1); // exclude last
|
|
||||||
}
|
}
|
||||||
|
|
||||||
QList<ResultInfo> databaseResults;
|
QList<ResultInfo> databaseResults;
|
||||||
@ -889,6 +886,13 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL
|
|||||||
emit databaseResultsChanged(databaseResults);
|
emit databaseResultsChanged(databaseResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copy messages for safety (since we can't hold the lock the whole time)
|
||||||
|
std::vector<ChatItem> chatItems;
|
||||||
|
{
|
||||||
|
auto [_, view] = getChat(); // holds lock
|
||||||
|
chatItems.assign(view.begin(), view.end() - 1); // exclude new response
|
||||||
|
}
|
||||||
|
|
||||||
auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty());
|
auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty());
|
||||||
return {
|
return {
|
||||||
/*PromptResult*/ {
|
/*PromptResult*/ {
|
||||||
|
@ -251,7 +251,7 @@ protected:
|
|||||||
};
|
};
|
||||||
|
|
||||||
ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
|
ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
|
||||||
std::optional<QList<ChatItem>> chat = {});
|
std::optional<std::pair<int, int>> subrange = {});
|
||||||
// passing a string_view directly skips templating and uses the raw string
|
// passing a string_view directly skips templating and uses the raw string
|
||||||
PromptResult promptInternal(const std::variant<std::span<const ChatItem>, std::string_view> &prompt,
|
PromptResult promptInternal(const std::variant<std::span<const ChatItem>, std::string_view> &prompt,
|
||||||
const LLModel::PromptContext &ctx,
|
const LLModel::PromptContext &ctx,
|
||||||
|
@ -362,7 +362,8 @@ public:
|
|||||||
|
|
||||||
// Used by Server to append a new conversation to the chat log.
|
// Used by Server to append a new conversation to the chat log.
|
||||||
// Appends a new, blank response to the end of the input list.
|
// Appends a new, blank response to the end of the input list.
|
||||||
void appendResponseWithHistory(QList<ChatItem> &history)
|
// Returns an (offset, count) pair representing the indices of the appended items, including the new response.
|
||||||
|
std::pair<int, int> appendResponseWithHistory(QList<ChatItem> &history)
|
||||||
{
|
{
|
||||||
if (history.empty())
|
if (history.empty())
|
||||||
throw std::invalid_argument("at least one message is required");
|
throw std::invalid_argument("at least one message is required");
|
||||||
@ -378,9 +379,11 @@ public:
|
|||||||
beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/);
|
beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/);
|
||||||
bool hadError;
|
bool hadError;
|
||||||
QList<ChatItem> newItems;
|
QList<ChatItem> newItems;
|
||||||
|
std::pair<int, int> subrange;
|
||||||
{
|
{
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
hadError = hasErrorUnlocked();
|
hadError = hasErrorUnlocked();
|
||||||
|
subrange = { m_chatItems.size(), history.size() };
|
||||||
m_chatItems.reserve(m_chatItems.size() + history.size());
|
m_chatItems.reserve(m_chatItems.size() + history.size());
|
||||||
for (auto &item : history)
|
for (auto &item : history)
|
||||||
m_chatItems << item;
|
m_chatItems << item;
|
||||||
@ -390,6 +393,7 @@ public:
|
|||||||
// Server can add messages when there is an error because each call is a new conversation
|
// Server can add messages when there is an error because each call is a new conversation
|
||||||
if (hadError)
|
if (hadError)
|
||||||
emit hasErrorChanged(false);
|
emit hasErrorChanged(false);
|
||||||
|
return subrange;
|
||||||
}
|
}
|
||||||
|
|
||||||
void truncate(qsizetype size)
|
void truncate(qsizetype size)
|
||||||
|
@ -781,7 +781,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
|
|||||||
case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break;
|
case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_chatModel->appendResponseWithHistory(chatItems);
|
auto subrange = m_chatModel->appendResponseWithHistory(chatItems);
|
||||||
|
|
||||||
// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
|
// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
|
||||||
LLModel::PromptContext promptCtx {
|
LLModel::PromptContext promptCtx {
|
||||||
@ -801,7 +801,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
|
|||||||
for (int i = 0; i < request.n; ++i) {
|
for (int i = 0; i < request.n; ++i) {
|
||||||
ChatPromptResult result;
|
ChatPromptResult result;
|
||||||
try {
|
try {
|
||||||
result = promptInternalChat(m_collections, promptCtx, chatItems);
|
result = promptInternalChat(m_collections, promptCtx, subrange);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
emit responseChanged(e.what());
|
emit responseChanged(e.what());
|
||||||
emit responseStopped(0);
|
emit responseStopped(0);
|
||||||
|
Loading…
Reference in New Issue
Block a user