diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md
index 17ae40b4..c153bf96 100644
--- a/gpt4all-chat/CHANGELOG.md
+++ b/gpt4all-chat/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [Unreleased]
+
+### Fixed
+- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302))
+
 ## [3.5.2] - 2024-12-13
 
 ### Added
@@ -223,6 +228,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
 - Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
 
+[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...HEAD
 [3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
 [3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
 [3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt
index c73237b1..2f1d7c42 100644
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@@ -4,9 +4,9 @@ include(../common/common.cmake)
 
 set(APP_VERSION_MAJOR 3)
 set(APP_VERSION_MINOR 5)
-set(APP_VERSION_PATCH 2)
+set(APP_VERSION_PATCH 3)
 set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
-set(APP_VERSION "${APP_VERSION_BASE}")
+set(APP_VERSION "${APP_VERSION_BASE}-dev0")
 
 project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
 
diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
index 8d2f11ad..408f9f3d 100644
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@@ -852,32 +852,29 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const ChatItem> items) const
 }
 
 auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
-                                 std::optional<QList<ChatItem>> chat) -> ChatPromptResult
+                                 std::optional<std::pair<int, int>> subrange) -> ChatPromptResult
 {
     Q_ASSERT(isModelLoaded());
     Q_ASSERT(m_chatModel);
 
+    // Return a (ChatModelAccessor, std::span) pair where the span represents the relevant messages for this chat.
+    // "subrange" is used to select only local server messages from the current chat session.
+    auto getChat = [&]() {
+        auto items = m_chatModel->chatItems(); // holds lock
+        std::span view(items);
+        if (subrange)
+            view = view.subspan(subrange->first, subrange->second);
+        Q_ASSERT(view.size() >= 2);
+        return std::pair(std::move(items), view);
+    };
+
     // copy messages for safety (since we can't hold the lock the whole time)
     std::optional<std::pair<int, QString>> query;
-    std::vector<ChatItem> chatItems;
     {
-        std::optional<ChatModelAccessor> items;
-        std::span<const ChatItem> view;
-        if (chat) {
-            view = *chat;
-        } else {
-            items = m_chatModel->chatItems(); // holds lock
-            Q_ASSERT(!items->empty());
-            view = *items;
-        }
-        Q_ASSERT(view.size() >= 2); // should be prompt/response pairs
-
         // Find the prompt that represents the query. Server chats are flexible and may not have one.
-        auto response = view.end() - 1;
-        if (auto peer = m_chatModel->getPeer(view, response))
+        auto [_, view] = getChat(); // holds lock
+        if (auto peer = m_chatModel->getPeer(view, view.end() - 1)) // peer of response
             query = { *peer - view.begin(), (*peer)->value };
-
-        chatItems.assign(view.begin(), view.end() - 1); // exclude last
     }
 
     QList<ResultInfo> databaseResults;
@@ -889,6 +886,13 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL
         emit databaseResultsChanged(databaseResults);
     }
 
+    // copy messages for safety (since we can't hold the lock the whole time)
+    std::vector<ChatItem> chatItems;
+    {
+        auto [_, view] = getChat(); // holds lock
+        chatItems.assign(view.begin(), view.end() - 1); // exclude new response
+    }
+
     auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty());
     return {
         /*PromptResult*/ {
diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h
index 1188de88..0d05de87 100644
--- a/gpt4all-chat/src/chatllm.h
+++ b/gpt4all-chat/src/chatllm.h
@@ -251,7 +251,7 @@ protected:
     };
 
     ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
-                                        std::optional<QList<ChatItem>> chat = {});
+                                        std::optional<std::pair<int, int>> subrange = {});
     // passing a string_view directly skips templating and uses the raw string
     PromptResult promptInternal(const std::variant<std::span<const ChatItem>, std::string_view> &prompt,
                                 const LLModel::PromptContext &ctx,
diff --git a/gpt4all-chat/src/chatmodel.h b/gpt4all-chat/src/chatmodel.h
index 036150dc..3cdb3ccf 100644
--- a/gpt4all-chat/src/chatmodel.h
+++ b/gpt4all-chat/src/chatmodel.h
@@ -362,7 +362,8 @@ public:
 
     // Used by Server to append a new conversation to the chat log.
     // Appends a new, blank response to the end of the input list.
-    void appendResponseWithHistory(QList<ChatItem> &history)
+    // Returns an (offset, count) pair representing the indices of the appended items, including the new response.
+    std::pair<int, int> appendResponseWithHistory(QList<ChatItem> &history)
     {
         if (history.empty())
             throw std::invalid_argument("at least one message is required");
@@ -378,9 +379,11 @@ public:
         beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/);
         bool hadError;
         QList<ChatItem> newItems;
+        std::pair<int, int> subrange;
         {
             QMutexLocker locker(&m_mutex);
             hadError = hasErrorUnlocked();
+            subrange = { m_chatItems.size(), history.size() };
             m_chatItems.reserve(m_chatItems.size() + history.size());
             for (auto &item : history)
                 m_chatItems << item;
@@ -390,6 +393,7 @@ public:
         // Server can add messages when there is an error because each call is a new conversation
         if (hadError)
             emit hasErrorChanged(false);
+        return subrange;
     }
 
     void truncate(qsizetype size)
diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp
index f9b7ae41..222f793c 100644
--- a/gpt4all-chat/src/server.cpp
+++ b/gpt4all-chat/src/server.cpp
@@ -781,7 +781,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
             case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break;
         }
     }
-    m_chatModel->appendResponseWithHistory(chatItems);
+    auto subrange = m_chatModel->appendResponseWithHistory(chatItems);
 
     // FIXME(jared): taking parameters from the UI inhibits reproducibility of results
     LLModel::PromptContext promptCtx {
@@ -801,7 +801,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
     for (int i = 0; i < request.n; ++i) {
         ChatPromptResult result;
         try {
-            result = promptInternalChat(m_collections, promptCtx, chatItems);
+            result = promptInternalChat(m_collections, promptCtx, subrange);
         } catch (const std::exception &e) {
             emit responseChanged(e.what());
             emit responseStopped(0);