mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-01 00:29:13 +00:00
backend: rebase llama.cpp on upstream as of Sep 26th (#2998)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
### Added
|
||||
- Add bm25 hybrid search to localdocs ([#2969](https://github.com/nomic-ai/gpt4all/pull/2969))
|
||||
|
||||
### Changed
|
||||
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
|
||||
|
||||
### Fixed
|
||||
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
|
||||
- Fix the local server rejecting min\_p/top\_p less than 1 ([#2996](https://github.com/nomic-ai/gpt4all/pull/2996))
|
||||
|
@@ -71,19 +71,19 @@ bool ChatAPI::isModelLoaded() const
|
||||
// All three of the state virtual functions are handled custom inside of chatllm save/restore
|
||||
size_t ChatAPI::stateSize() const
|
||||
{
|
||||
return 0;
|
||||
throw std::logic_error("not implemented");
|
||||
}
|
||||
|
||||
size_t ChatAPI::saveState(uint8_t *dest) const
|
||||
size_t ChatAPI::saveState(std::span<uint8_t> dest) const
|
||||
{
|
||||
Q_UNUSED(dest);
|
||||
return 0;
|
||||
throw std::logic_error("not implemented");
|
||||
}
|
||||
|
||||
size_t ChatAPI::restoreState(const uint8_t *src)
|
||||
size_t ChatAPI::restoreState(std::span<const uint8_t> src)
|
||||
{
|
||||
Q_UNUSED(src);
|
||||
return 0;
|
||||
throw std::logic_error("not implemented");
|
||||
}
|
||||
|
||||
void ChatAPI::prompt(const std::string &prompt,
|
||||
|
@@ -64,8 +64,8 @@ public:
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
|
||||
size_t stateSize() const override;
|
||||
size_t saveState(uint8_t *dest) const override;
|
||||
size_t restoreState(const uint8_t *src) override;
|
||||
size_t saveState(std::span<uint8_t> dest) const override;
|
||||
size_t restoreState(std::span<const uint8_t> src) override;
|
||||
void prompt(const std::string &prompt,
|
||||
const std::string &promptTemplate,
|
||||
std::function<bool(int32_t)> promptCallback,
|
||||
@@ -118,12 +118,14 @@ protected:
|
||||
throw std::logic_error("not implemented");
|
||||
}
|
||||
|
||||
Token sampleToken(PromptContext &ctx) const override
|
||||
void initSampler(PromptContext &ctx) override
|
||||
{
|
||||
(void)ctx;
|
||||
throw std::logic_error("not implemented");
|
||||
}
|
||||
|
||||
Token sampleToken() const override { throw std::logic_error("not implemented"); }
|
||||
|
||||
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override
|
||||
{
|
||||
(void)ctx;
|
||||
|
@@ -1174,7 +1174,13 @@ void ChatLLM::saveState()
|
||||
#if defined(DEBUG)
|
||||
qDebug() << "saveState" << m_llmThread.objectName() << "size:" << m_state.size();
|
||||
#endif
|
||||
m_llModelInfo.model->saveState(static_cast<uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
||||
bool ok = m_llModelInfo.model->saveState({reinterpret_cast<uint8_t *>(m_state.data()), size_t(m_state.size())});
|
||||
if (!ok) {
|
||||
// FIXME(jared): how badly does this situation break GPT4All?
|
||||
qWarning() << "ChatLLM failed to save LLModel state";
|
||||
m_state.clear();
|
||||
m_state.squeeze();
|
||||
}
|
||||
}
|
||||
|
||||
void ChatLLM::restoreState()
|
||||
@@ -1183,7 +1189,7 @@ void ChatLLM::restoreState()
|
||||
return;
|
||||
|
||||
if (m_llModelType == LLModelType::API_) {
|
||||
QDataStream stream(&m_state, QIODeviceBase::ReadOnly);
|
||||
QDataStream stream(m_state);
|
||||
stream.setVersion(QDataStream::Qt_6_4);
|
||||
ChatAPI *chatAPI = static_cast<ChatAPI*>(m_llModelInfo.model.get());
|
||||
QList<QString> context;
|
||||
@@ -1201,12 +1207,12 @@ void ChatLLM::restoreState()
|
||||
if (m_state.isEmpty())
|
||||
return;
|
||||
|
||||
if (m_llModelInfo.model->stateSize() == m_state.size()) {
|
||||
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
||||
size_t bytesRead = m_llModelInfo.model->restoreState({reinterpret_cast<uint8_t *>(m_state.data()), size_t(m_state.size())});
|
||||
if (bytesRead) {
|
||||
m_processedSystemPrompt = true;
|
||||
m_pristineLoadedState = true;
|
||||
} else {
|
||||
qWarning() << "restoring state from text because" << m_llModelInfo.model->stateSize() << "!=" << m_state.size();
|
||||
qWarning() << "restoring state from text because of error reading state (mismatch or corrupt data)";
|
||||
m_restoreStateFromText = true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user