diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 17ae40b4..c153bf96 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [Unreleased] + +### Fixed +- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302)) + ## [3.5.2] - 2024-12-13 ### Added @@ -223,6 +228,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694)) - Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701)) +[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...HEAD [3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2 [3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1 [3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0 diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index c73237b1..2f1d7c42 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -4,9 +4,9 @@ include(../common/common.cmake) set(APP_VERSION_MAJOR 3) set(APP_VERSION_MINOR 5) -set(APP_VERSION_PATCH 2) +set(APP_VERSION_PATCH 3) set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}") -set(APP_VERSION "${APP_VERSION_BASE}") +set(APP_VERSION "${APP_VERSION_BASE}-dev0") project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C) diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp index 8d2f11ad..408f9f3d 100644 --- a/gpt4all-chat/src/chatllm.cpp +++ b/gpt4all-chat/src/chatllm.cpp @@ -852,32 +852,29 @@ std::string ChatLLM::applyJinjaTemplate(std::span items) const } auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx, - std::optional> chat) -> ChatPromptResult + std::optional> subrange) -> ChatPromptResult { Q_ASSERT(isModelLoaded()); Q_ASSERT(m_chatModel); + // Return a (ChatModelAccessor, std::span) pair where the span represents the relevant messages for this chat. + // "subrange" is used to select only local server messages from the current chat session. + auto getChat = [&]() { + auto items = m_chatModel->chatItems(); // holds lock + std::span view(items); + if (subrange) + view = view.subspan(subrange->first, subrange->second); + Q_ASSERT(view.size() >= 2); + return std::pair(std::move(items), view); + }; + // copy messages for safety (since we can't hold the lock the whole time) std::optional> query; - std::vector chatItems; { - std::optional items; - std::span view; - if (chat) { - view = *chat; - } else { - items = m_chatModel->chatItems(); // holds lock - Q_ASSERT(!items->empty()); - view = *items; - } - Q_ASSERT(view.size() >= 2); // should be prompt/response pairs - // Find the prompt that represents the query. Server chats are flexible and may not have one. - auto response = view.end() - 1; - if (auto peer = m_chatModel->getPeer(view, response)) + auto [_, view] = getChat(); // holds lock + if (auto peer = m_chatModel->getPeer(view, view.end() - 1)) // peer of response query = { *peer - view.begin(), (*peer)->value }; - - chatItems.assign(view.begin(), view.end() - 1); // exclude last } QList databaseResults; @@ -889,6 +886,13 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL emit databaseResultsChanged(databaseResults); } + // copy messages for safety (since we can't hold the lock the whole time) + std::vector chatItems; + { + auto [_, view] = getChat(); // holds lock + chatItems.assign(view.begin(), view.end() - 1); // exclude new response + } + auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty()); return { /*PromptResult*/ { diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h index 1188de88..0d05de87 100644 --- a/gpt4all-chat/src/chatllm.h +++ b/gpt4all-chat/src/chatllm.h @@ -251,7 +251,7 @@ protected: }; ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx, - std::optional> chat = {}); + std::optional> subrange = {}); // passing a string_view directly skips templating and uses the raw string PromptResult promptInternal(const std::variant, std::string_view> &prompt, const LLModel::PromptContext &ctx, diff --git a/gpt4all-chat/src/chatmodel.h b/gpt4all-chat/src/chatmodel.h index 036150dc..3cdb3ccf 100644 --- a/gpt4all-chat/src/chatmodel.h +++ b/gpt4all-chat/src/chatmodel.h @@ -362,7 +362,8 @@ public: // Used by Server to append a new conversation to the chat log. // Appends a new, blank response to the end of the input list. - void appendResponseWithHistory(QList &history) + // Returns an (offset, count) pair representing the indices of the appended items, including the new response. + std::pair appendResponseWithHistory(QList &history) { if (history.empty()) throw std::invalid_argument("at least one message is required"); @@ -378,9 +379,11 @@ public: beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/); bool hadError; QList newItems; + std::pair subrange; { QMutexLocker locker(&m_mutex); hadError = hasErrorUnlocked(); + subrange = { m_chatItems.size(), history.size() }; m_chatItems.reserve(m_chatItems.size() + history.size()); for (auto &item : history) m_chatItems << item; @@ -390,6 +393,7 @@ public: // Server can add messages when there is an error because each call is a new conversation if (hadError) emit hasErrorChanged(false); + return subrange; } void truncate(qsizetype size) diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp index f9b7ae41..222f793c 100644 --- a/gpt4all-chat/src/server.cpp +++ b/gpt4all-chat/src/server.cpp @@ -781,7 +781,7 @@ auto Server::handleChatRequest(const ChatRequest &request) case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break; } } - m_chatModel->appendResponseWithHistory(chatItems); + auto subrange = m_chatModel->appendResponseWithHistory(chatItems); // FIXME(jared): taking parameters from the UI inhibits reproducibility of results LLModel::PromptContext promptCtx { @@ -801,7 +801,7 @@ auto Server::handleChatRequest(const ChatRequest &request) for (int i = 0; i < request.n; ++i) { ChatPromptResult result; try { - result = promptInternalChat(m_collections, promptCtx, chatItems); + result = promptInternalChat(m_collections, promptCtx, subrange); } catch (const std::exception &e) { emit responseChanged(e.what()); emit responseStopped(0);